From 9088a88b16892d94bdbd06ea05b2250d0f6073e5 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 19:07:15 -0300
Subject: [PATCH 01/38] feat: Add knowledge store for persistent agent memory

Implements a tag-based XML knowledge system that replaces the lossy
summary-of-summaries merge with knowledge extraction:

- AgentMemory gains a knowledge field (persistent tagged XML string)
- Knowledge is injected into context just before the last message
- merge_compressed() now extracts tagged XML facts via LLM and applies
  them to the knowledge store using merge_context() (deterministic:
  update existing tags, delete empty tags, append new tags)
- Merged compressed steps are removed (info lives in knowledge)
- New UpdateKnowledge tool lets the agent manage its own knowledge
- Three new functions: merge_context(), list_xml_tag_names(),
  create_knowledge_extraction_prompt()

Files changed:
- memory.py: knowledge field + reset
- agents.py: knowledge injection + UpdateKnowledge tool registration
- bp_compression.py: merge_context, knowledge extraction prompt,
  modified merge_compressed to return (steps, knowledge) tuple
- bp_tools.py: UpdateKnowledge tool class
- tests/test_compression.py: updated + 13 new tests (67 pass)

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py         |  20 ++-
 src/smolagents/bp_compression.py | 249 +++++++++++++++++++++----------
 src/smolagents/bp_tools.py       |  51 +++++++
 src/smolagents/memory.py         |   2 +
 tests/test_compression.py        | 242 ++++++++++++++++++++++--------
 5 files changed, 422 insertions(+), 142 deletions(-)
diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index c05b5b4f7..963c8e380 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -31,7 +31,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal, Type, TypeAlias, TypedDict, Union
 from .bp_executors import LocalExecExecutor
-from .bp_tools import get_file_size, force_directories, remove_after_markers, PlanningTool, MoveActionStepToMemory, RetrieveActionStepFromMemory, SummarizeActionStep, GetToolDescriptionsTool
+from .bp_tools import get_file_size, force_directories, remove_after_markers, PlanningTool, MoveActionStepToMemory, RetrieveActionStepFromMemory, SummarizeActionStep, UpdateKnowledge, GetToolDescriptionsTool
 from .bp_utils import bp_parse_code_blobs, fix_nested_tags
 from .bp_utils import is_valid_python_code
 from. utils import MAX_LENGTH_TRUNCATE_CONTENT
@@ -446,6 +446,10 @@ def _bind_memory_tools(self):
             tool = SummarizeActionStep()
             tool.set_agent(self)
             self.tools["summarize_actionstep"] = tool
+        if "update_knowledge" not in self.tools:
+            tool = UpdateKnowledge()
+            tool.set_agent(self)
+            self.tools["update_knowledge"] = tool
 
     def _bind_tool_descriptions(self):
         """Add get_tool_descriptions tool and populate it with full docs from all other tools."""
@@ -855,11 +859,23 @@ def write_memory_to_messages(
         """
         Reads past llm_outputs, actions, and observations or errors from the memory into a series of messages
         that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help
-        the LLM.
+        the LLM. If the agent has accumulated knowledge, it is injected just before the last message.
         """
         messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
         for memory_step in self.memory.steps:
             messages.extend(memory_step.to_messages(summary_mode=summary_mode))
+
+        # Inject knowledge near the end of context (just before the last message)
+        if self.memory.knowledge and self.memory.knowledge.strip():
+            knowledge_msg = ChatMessage(
+                role=MessageRole.USER,
+                content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
+            )
+            if len(messages) > 1:
+                messages.insert(len(messages) - 1, knowledge_msg)
+            else:
+                messages.append(knowledge_msg)
+
         return messages
 
     def get_context_char_size(self) -> int:
diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 1f17a681a..083370947 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -8,6 +8,7 @@
 via LLM summarization while keeping recent steps in full detail.
 """
 
+import re
 import time
 from dataclasses import dataclass, field
 from logging import getLogger
@@ -31,6 +32,9 @@
     "estimate_step_tokens",
     "create_compression_callback",
     "create_merge_prompt",
+    "merge_context",
+    "list_xml_tag_names",
+    "create_knowledge_extraction_prompt",
 ]
 
 
@@ -312,6 +316,133 @@ def create_merge_prompt(compressed_steps: list[CompressedHistoryStep]) -> str:
 CONSOLIDATED SUMMARY:"""
 
 
+def list_xml_tag_names(text: str) -> list[str]:
+    """List unique top-level XML tag names found in text.
+
+    Args:
+        text: String containing XML-tagged content.
+
+    Returns:
+        Sorted list of unique tag names.
+    """
+    if not text or not text.strip():
+        return []
+    names = set()
+    for m in re.finditer(r'<([\w][\w-]*)[\s>/]', text):
+        names.add(m.group(1))
+    return sorted(names)
+
+
+def merge_context(existing: str, updates: str) -> str:
+    """Merge tagged XML updates into existing context using three rules:
+
+    1. **UPDATE**: If a tag in ``updates`` has content and exists in ``existing``, replace it.
+    2. **DELETE**: If a tag in ``updates`` is self-closing (``<tag/>``) or empty
+       (``<tag></tag>``), remove it from ``existing``.
+    3. **APPEND**: If a tag in ``updates`` has content and does NOT exist in
+       ``existing``, append it.
+
+    Args:
+        existing: The current knowledge string (tagged XML).
+        updates: New tagged XML with updates, deletions, or additions.
+
+    Returns:
+        Updated knowledge string after applying all operations.
+    """
+    if not updates or not updates.strip():
+        return existing
+
+    result = existing if existing else ""
+    processed_tags = set()
+
+    # 1. Self-closing tags: <tag/> or <tag /> -> DELETE
+    for m in re.finditer(r'<([\w][\w-]*)\s*/>', updates):
+        tag = m.group(1)
+        if tag in processed_tags:
+            continue
+        processed_tags.add(tag)
+        result = re.sub(rf'<{re.escape(tag)}>.*?</{re.escape(tag)}>\s*', '', result, flags=re.DOTALL)
+        result = re.sub(rf'<{re.escape(tag)}\s*/>\s*', '', result, flags=re.DOTALL)
+
+    # 2. Content tags: <tag>content</tag>
+    for m in re.finditer(r'<([\w][\w-]*)>(.*?)</\1>', updates, re.DOTALL):
+        tag = m.group(1)
+        content = m.group(2)
+        if tag in processed_tags:
+            continue
+        processed_tags.add(tag)
+        full_tag = f'<{tag}>{content}</{tag}>'
+        if content.strip() == '':
+            # Empty content -> DELETE
+            result = re.sub(rf'<{re.escape(tag)}>.*?</{re.escape(tag)}>\s*', '', result, flags=re.DOTALL)
+        elif re.search(rf'<{re.escape(tag)}>.*?</{re.escape(tag)}>', result, re.DOTALL):
+            # Tag exists -> UPDATE
+            result = re.sub(rf'<{re.escape(tag)}>.*?</{re.escape(tag)}>', full_tag, result, flags=re.DOTALL)
+        else:
+            # Tag doesn't exist -> APPEND
+            result = result.rstrip() + '\n' + full_tag + '\n'
+
+    return result
+
+
+def create_knowledge_extraction_prompt(
+    compressed_steps: list[CompressedHistoryStep],
+    existing_tag_names: list[str] | None = None,
+) -> str:
+    """Create a prompt for extracting knowledge from compressed summaries.
+
+    Instead of rewriting the full knowledge, this prompt asks the LLM to produce
+    a tagged XML diff: updates to existing sections, new sections, or deletions.
+
+    Args:
+        compressed_steps: List of CompressedHistoryStep instances to extract knowledge from.
+        existing_tag_names: List of tag names currently in the knowledge store.
+
+    Returns:
+        The prompt string for the knowledge extraction LLM call.
+    """
+    summaries = []
+    for i, step in enumerate(compressed_steps, 1):
+        summaries.append(
+            f"Summary {i} (covering {step.original_step_count} steps, "
+            f"step numbers: {step.compressed_step_numbers}):\n{step.summary}"
+        )
+    summaries_text = "\n\n".join(summaries)
+    total_steps = sum(step.original_step_count for step in compressed_steps)
+
+    if existing_tag_names:
+        tag_list = ", ".join(existing_tag_names)
+        existing_section = f"""
+Existing knowledge sections: {tag_list}
+
+Rules:
+- To UPDATE an existing section, use the same tag name with new content
+- To DELETE a section that is no longer relevant, use an empty self-closing tag: <tagname/>
+- To ADD new information, use a new descriptive tag name
+- Only output sections that are new, changed, or should be deleted
+- Do NOT output sections that have not changed"""
+    else:
+        existing_section = """
+There are no existing knowledge sections yet. Create new tagged sections for
+the important information found in the summaries below.
+Use descriptive tag names (e.g., <plan>, <architecture>, <key_findings>, <current_status>)."""
+
+    return f"""Extract key knowledge from the following {len(compressed_steps)} summaries
+covering {total_steps} total steps of agent execution.
+
+Output the knowledge as XML-tagged sections. Each section should contain concise,
+factual information that would be useful for continuing the task.
+{existing_section}
+
+{COMMON_COMPRESSION_INSTRUCTIONS}
+
+<SUMMARIES>
+{summaries_text}
+</SUMMARIES>
+
+KNOWLEDGE UPDATE:"""
+
+
 class ContextCompressor:
     """Manages context compression for agent memory.
 
@@ -561,30 +692,30 @@ def should_merge_compressed(self, steps: list[MemoryStep]) -> bool:
         mergeable_count = compressed_count - self.config.keep_compressed_steps
         return mergeable_count >= 2
 
-    def merge_compressed(self, steps: list[MemoryStep]) -> list[MemoryStep]:
-        """Merge multiple CompressedHistoryStep instances into one.
+    def merge_compressed(self, steps: list[MemoryStep], knowledge: str = "") -> tuple[list[MemoryStep], str]:
+        """Merge older compressed history steps by extracting knowledge.
 
-        Collects all compressed history steps, generates a consolidated summary
-        via LLM, and replaces them with a single merged CompressedHistoryStep.
-        If keep_compressed_steps is set, the most recent N compressed steps are
-        preserved and only older ones are merged.
+        Instead of rewriting all compressed summaries into a single prose summary,
+        this method extracts tagged XML knowledge from the older compressed steps
+        and merges it into the existing knowledge store using ``merge_context()``.
+        The merged compressed steps are then removed from the step list.
 
-        Note: This is a lossy operation (summary of summaries). Information
-        fidelity decreases with each merge cycle.
+        If keep_compressed_steps is set, the most recent N compressed steps are
+        preserved and only older ones are processed.
 
         Args:
             steps: Current list of memory steps.
+            knowledge: Current knowledge string (tagged XML).
 
         Returns:
-            New list with older compressed history steps merged into one,
-            preserving the most recent keep_compressed_steps instances.
+            Tuple of (new_steps, updated_knowledge).
         """
         start_time = time.time()
 
         compressed_steps = [step for step in steps if isinstance(step, CompressedHistoryStep)]
 
         if len(compressed_steps) <= 1:
-            return steps
+            return steps, knowledge
 
         # Determine which compressed steps to keep vs merge
         keep_count = self.config.keep_compressed_steps
@@ -598,7 +729,7 @@ def merge_compressed(self, steps: list[MemoryStep]) -> list[MemoryStep]:
             steps_to_merge = compressed_steps
 
         if len(steps_to_merge) <= 1:
-            return steps  # Not enough to merge
+            return steps, knowledge  # Not enough to merge
 
         # Skip merge if combined content is too small to be worth an LLM call
         pre_merge_chars = sum(len(step.summary) for step in steps_to_merge)
@@ -611,10 +742,11 @@ def merge_compressed(self, steps: list[MemoryStep]) -> list[MemoryStep]:
                 )
             else:
                 logger.info(f"Merge skipped: combined summaries ({pre_merge_chars} chars) < min_compression_chars ({self.config.min_compression_chars})")
-            return steps
+            return steps, knowledge
 
-        # Build merge prompt and call LLM
-        merge_prompt = create_merge_prompt(steps_to_merge)
+        # Build knowledge extraction prompt and call LLM
+        existing_tag_names = list_xml_tag_names(knowledge)
+        merge_prompt = create_knowledge_extraction_prompt(steps_to_merge, existing_tag_names)
 
         try:
             merge_message = self.compression_model.generate(
@@ -625,85 +757,50 @@ def merge_compressed(self, steps: list[MemoryStep]) -> list[MemoryStep]:
                     )
                 ]
             )
-            merged_summary = merge_message.content
-            if isinstance(merged_summary, list):
-                merged_summary = " ".join(
-                    item.get("text", "") for item in merged_summary if isinstance(item, dict)
+            knowledge_updates = merge_message.content
+            if isinstance(knowledge_updates, list):
+                knowledge_updates = " ".join(
+                    item.get("text", "") for item in knowledge_updates if isinstance(item, dict)
                 )
-
-            merge_token_usage = merge_message.token_usage
         except Exception as e:
-            logger.warning(f"Compressed step merge failed, keeping original steps: {e}")
-            return steps
+            logger.warning(f"Knowledge extraction failed, keeping original steps: {e}")
+            return steps, knowledge
 
-        # Safety check: skip merge if consolidated summary is larger than combined originals
+        # Apply the tagged XML diff to the knowledge store
         original_chars = sum(len(step.summary) for step in steps_to_merge)
-        merged_chars = len(merged_summary) if merged_summary else 0
-        if merged_chars >= original_chars:
-            if self.agent_logger:
-                self.agent_logger.log_markdown(
-                    content=f"Merge skipped: consolidated summary ({merged_chars:,} chars) "
-                    f"is larger than combined originals ({original_chars:,} chars)",
-                    title="Compressed Step Merge Skipped",
-                    level=LogLevel.INFO,
-                )
-            else:
-                logger.info(
-                    f"Merge skipped: consolidated summary ({merged_chars} chars) "
-                    f">= combined originals ({original_chars} chars)"
-                )
-            return steps
+        updated_knowledge = merge_context(knowledge, knowledge_updates) if knowledge_updates else knowledge
 
-        # Accumulate metadata from merged compressed steps
-        all_step_numbers = []
-        total_original_count = 0
-        for step in steps_to_merge:
-            all_step_numbers.extend(step.compressed_step_numbers)
-            total_original_count += step.original_step_count
-
-        merged_step = CompressedHistoryStep(
-            summary=merged_summary,
-            compressed_step_numbers=sorted(set(all_step_numbers)),
-            original_step_count=total_original_count,
-            timing=Timing(start_time=start_time, end_time=time.time()),
-            compression_token_usage=merge_token_usage,
-        )
+        # Accumulate metadata
+        total_original_count = sum(step.original_step_count for step in steps_to_merge)
 
-        # Rebuild steps: replace merged CompressedHistoryStep instances with the single
-        # merged one, while preserving kept compressed steps in their original positions
+        # Remove merged compressed steps from the step list
         merge_set = set(id(step) for step in steps_to_merge)
-        new_steps = []
-        merged_inserted = False
-
-        for step in steps:
-            if isinstance(step, CompressedHistoryStep) and id(step) in merge_set:
-                if not merged_inserted:
-                    new_steps.append(merged_step)
-                    merged_inserted = True
-                # Skip subsequent merged compressed steps (replaced by merged)
-            else:
-                new_steps.append(step)
+        new_steps = [step for step in steps if not (isinstance(step, CompressedHistoryStep) and id(step) in merge_set)]
 
         # Log
         kept_count = len(compressed_steps) - len(steps_to_merge)
+        knowledge_chars = len(updated_knowledge) if updated_knowledge else 0
+        elapsed = time.time() - start_time
         if self.agent_logger:
-            compression_ratio = (1 - merged_chars / original_chars) * 100 if original_chars > 0 else 0
             kept_msg = f" Kept {kept_count} recent compressed steps." if kept_count > 0 else ""
+            tag_names = list_xml_tag_names(updated_knowledge)
             self.agent_logger.log_markdown(
-                content=f"Merged {len(steps_to_merge)} compressed steps "
-                f"({total_original_count} original steps) from {original_chars:,} chars "
-                f"to {merged_chars:,} chars ({compression_ratio:.1f}% reduction).{kept_msg}",
-                title="Compressed Step Merge",
+                content=f"Extracted knowledge from {len(steps_to_merge)} compressed steps "
+                f"({total_original_count} original steps, {original_chars:,} chars). "
+                f"Knowledge store: {knowledge_chars:,} chars, sections: {tag_names}. "
+                f"Elapsed: {elapsed:.1f}s.{kept_msg}",
+                title="Knowledge Extraction",
                 level=LogLevel.INFO,
             )
         else:
             kept_msg = f", kept {kept_count} recent" if kept_count > 0 else ""
             logger.info(
-                f"Merged {len(steps_to_merge)} compressed steps "
-                f"({total_original_count} original steps) into single summary{kept_msg}"
+                f"Extracted knowledge from {len(steps_to_merge)} compressed steps "
+                f"({total_original_count} original steps) into knowledge store "
+                f"({knowledge_chars} chars){kept_msg}"
             )
 
-        return new_steps
+        return new_steps, updated_knowledge
 
 
 def create_compression_callback(compressor: ContextCompressor) -> Callable:
@@ -730,6 +827,8 @@ def compression_callback(step: MemoryStep, agent: "MultiStepAgent") -> None:
             agent.memory.steps = compressor.compress(agent.memory.steps)
 
         if compressor.should_merge_compressed(agent.memory.steps):
-            agent.memory.steps = compressor.merge_compressed(agent.memory.steps)
+            agent.memory.steps, agent.memory.knowledge = compressor.merge_compressed(
+                agent.memory.steps, agent.memory.knowledge
+            )
 
     return compression_callback
diff --git a/src/smolagents/bp_tools.py b/src/smolagents/bp_tools.py
index 75eb9cee3..5beb62d59 100644
--- a/src/smolagents/bp_tools.py
+++ b/src/smolagents/bp_tools.py
@@ -2993,6 +2993,57 @@ def load_image_callback(memory_step, agent=None):
     )
 
 
+class UpdateKnowledge(Tool):
+    """A tool that allows the agent to update its persistent knowledge store.
+
+    The knowledge store is a tagged XML string that survives compression cycles
+    and is injected into the agent's context near the end of each turn.
+    Updates use a tag-based merge: existing tags are updated, empty/self-closing
+    tags are deleted, and new tags are appended.
+
+    Must be bound to an agent via ``set_agent`` before use.
+    """
+
+    name = "update_knowledge"
+    description = (
+        "Update your persistent knowledge store with tagged XML sections. "
+        "The knowledge store appears in your context as <knowledge>...</knowledge> and survives compression. "
+        "Use descriptive tag names. To ADD or UPDATE a section: <tagname>content</tagname>. "
+        "To DELETE a section: <tagname/>. Only include sections you want to change. "
+        "Example: update_knowledge('<current_plan>1. Do X  2. Do Y</current_plan><old_notes/>')"
+    )
+    inputs = {
+        "updates": {
+            "type": "string",
+            "description": "Tagged XML with sections to add, update, or delete. "
+            "Use <tagname>content</tagname> to add/update, <tagname/> to delete.",
+        },
+    }
+    output_type = "string"
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._agent = None
+
+    def set_agent(self, agent):
+        """Bind this tool to an agent so it can access the knowledge store."""
+        self._agent = agent
+
+    def forward(self, updates: str) -> str:
+        if self._agent is None:
+            return "Error: UpdateKnowledge is not bound to an agent. Call set_agent() first."
+
+        if not updates or not updates.strip():
+            return "Error: No updates provided."
+
+        from smolagents.bp_compression import merge_context, list_xml_tag_names
+
+        old_knowledge = self._agent.memory.knowledge or ""
+        self._agent.memory.knowledge = merge_context(old_knowledge, updates)
+        tag_names = list_xml_tag_names(self._agent.memory.knowledge)
+        return f"Knowledge updated. Current sections: {tag_names}"
+
+
 class GetToolDescriptionsTool(Tool):
     """Tool that returns full descriptions for specified tools, enabling compact tool listings in the system prompt."""
 
diff --git a/src/smolagents/memory.py b/src/smolagents/memory.py
index 091b318c4..dfa039ee7 100644
--- a/src/smolagents/memory.py
+++ b/src/smolagents/memory.py
@@ -245,10 +245,12 @@ class AgentMemory:
     def __init__(self, system_prompt: str):
         self.system_prompt: SystemPromptStep = SystemPromptStep(system_prompt=system_prompt)
         self.steps: list[TaskStep | ActionStep | PlanningStep] = []
+        self.knowledge: str = ""
 
     def reset(self):
         """Reset the agent's memory, clearing all steps and keeping the system prompt."""
         self.steps = []
+        self.knowledge = ""
 
     def get_succinct_steps(self) -> list[dict]:
         """Return a succinct representation of the agent's steps, excluding model input messages."""
diff --git a/tests/test_compression.py b/tests/test_compression.py
index 997b54bef..ad2b1354c 100644
--- a/tests/test_compression.py
+++ b/tests/test_compression.py
@@ -372,7 +372,7 @@ def test_compress_returns_original_when_not_needed(self):
         assert result == steps
 
     def test_compress_creates_compressed_step(self):
-        config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2)
+        config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
@@ -482,15 +482,16 @@ def test_merge_compressed_returns_original_when_single(self):
             CompressedHistoryStep(summary="Only one", compressed_step_numbers=[1], original_step_count=1),
             ActionStep(step_number=5, timing=Timing(start_time=0, end_time=1)),
         ]
-        result = compressor.merge_compressed(steps)
-        assert result == steps
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
+        assert result_steps == steps
+        assert result_knowledge == ""
 
-    def test_merge_compressed_creates_merged_step(self):
-        config = CompressionConfig(max_compressed_steps=2, keep_compressed_steps=0)
+    def test_merge_compressed_extracts_knowledge(self):
+        config = CompressionConfig(max_compressed_steps=2, keep_compressed_steps=0, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Consolidated summary.",
+            content="<findings>Useful data found and analyzed.</findings><status>Final output prepared.</status>",
             token_usage=TokenUsage(input_tokens=200, output_tokens=30),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -515,19 +516,16 @@ def test_merge_compressed_creates_merged_step(self):
             ActionStep(step_number=9, timing=Timing(start_time=0, end_time=1)),
         ]
 
-        result = compressor.merge_compressed(steps)
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
 
-        # Should have: TaskStep + 1 merged CompressedHistoryStep + ActionStep
-        assert len(result) == 3
-        assert isinstance(result[0], TaskStep)
-        assert isinstance(result[1], CompressedHistoryStep)
-        assert isinstance(result[2], ActionStep)
+        # Compressed steps should be removed, leaving TaskStep + ActionStep
+        assert len(result_steps) == 2
+        assert isinstance(result_steps[0], TaskStep)
+        assert isinstance(result_steps[1], ActionStep)
 
-        merged = result[1]
-        assert merged.summary == "Consolidated summary."
-        assert merged.original_step_count == 8  # 3 + 3 + 2
-        assert merged.compressed_step_numbers == [1, 2, 3, 4, 5, 6, 7, 8]
-        assert merged.compression_token_usage.input_tokens == 200
+        # Knowledge should contain the extracted tags
+        assert "<findings>" in result_knowledge
+        assert "<status>" in result_knowledge
 
         mock_model.generate.assert_called_once()
 
@@ -542,27 +540,41 @@ def test_merge_compressed_handles_model_failure(self):
             CompressedHistoryStep(summary="Summary B", compressed_step_numbers=[2], original_step_count=1),
         ]
 
-        result = compressor.merge_compressed(steps)
-        assert result == steps
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
+        assert result_steps == steps
+        assert result_knowledge == ""
 
-    def test_merge_compressed_skips_when_merged_is_larger(self):
-        config = CompressionConfig(max_compressed_steps=1, keep_compressed_steps=0)
+    def test_merge_compressed_updates_existing_knowledge(self):
+        config = CompressionConfig(max_compressed_steps=1, keep_compressed_steps=0, min_compression_chars=0)
         mock_model = MagicMock()
-        # Return a summary that's longer than the combined originals
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="This merged summary is intentionally much longer than the originals to trigger the safety check.",
+            content="<status>All tasks complete</status>",
             token_usage=TokenUsage(input_tokens=100, output_tokens=50),
         )
         compressor = ContextCompressor(config, mock_model)
 
         steps = [
-            CompressedHistoryStep(summary="Short A", compressed_step_numbers=[1], original_step_count=1),
-            CompressedHistoryStep(summary="Short B", compressed_step_numbers=[2], original_step_count=1),
+            CompressedHistoryStep(
+                summary="Agent searched for information and found useful data about the topic.",
+                compressed_step_numbers=[1],
+                original_step_count=1,
+            ),
+            CompressedHistoryStep(
+                summary="Agent analyzed the data and drew conclusions about the results.",
+                compressed_step_numbers=[2],
+                original_step_count=1,
+            ),
         ]
 
-        result = compressor.merge_compressed(steps)
-        assert result == steps  # Should keep originals
+        existing_knowledge = "<db>PostgreSQL</db>\n<status>In progress</status>"
+        result_steps, result_knowledge = compressor.merge_compressed(steps, existing_knowledge)
+        # Compressed steps removed
+        assert len(result_steps) == 0
+        # Knowledge should have updated status and kept db
+        assert "<db>PostgreSQL</db>" in result_knowledge
+        assert "<status>All tasks complete</status>" in result_knowledge
+        assert "In progress" not in result_knowledge
 
     def test_should_merge_compressed_false_when_not_enough_mergeable(self):
         """With keep_compressed_steps=2 and 3 compressed steps, only 1 is mergeable (need 2)."""
@@ -585,11 +597,11 @@ def test_should_merge_compressed_true_with_enough_mergeable(self):
         assert compressor.should_merge_compressed(steps) is True
 
     def test_merge_compressed_keeps_recent_compressed_steps(self):
-        config = CompressionConfig(max_compressed_steps=2, keep_compressed_steps=1)
+        config = CompressionConfig(max_compressed_steps=2, keep_compressed_steps=1, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Consolidated summary.",
+            content="<findings>Useful data about the topic.</findings>",
             token_usage=TokenUsage(input_tokens=200, output_tokens=30),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -614,33 +626,29 @@ def test_merge_compressed_keeps_recent_compressed_steps(self):
             ActionStep(step_number=9, timing=Timing(start_time=0, end_time=1)),
         ]
 
-        result = compressor.merge_compressed(steps)
-
-        # Should have: TaskStep + 1 merged + 1 kept compressed + ActionStep
-        assert len(result) == 4
-        assert isinstance(result[0], TaskStep)
-        assert isinstance(result[1], CompressedHistoryStep)  # merged
-        assert isinstance(result[2], CompressedHistoryStep)  # kept (most recent)
-        assert isinstance(result[3], ActionStep)
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
 
-        # The merged step should only cover the first 2 compressed steps
-        merged = result[1]
-        assert merged.summary == "Consolidated summary."
-        assert merged.original_step_count == 6  # 3 + 3
-        assert merged.compressed_step_numbers == [1, 2, 3, 4, 5, 6]
+        # Merged compressed steps removed, kept one remains: TaskStep + 1 kept compressed + ActionStep
+        assert len(result_steps) == 3
+        assert isinstance(result_steps[0], TaskStep)
+        assert isinstance(result_steps[1], CompressedHistoryStep)  # kept (most recent)
+        assert isinstance(result_steps[2], ActionStep)
 
         # The kept step should be the most recent one (unchanged)
-        kept = result[2]
+        kept = result_steps[1]
         assert kept.summary == "Agent refined the analysis and prepared the final output."
         assert kept.compressed_step_numbers == [7, 8]
 
+        # Knowledge should have the extracted info
+        assert "<findings>" in result_knowledge
+
     def test_merge_compressed_keep_zero_merges_all(self):
-        """With keep_compressed_steps=0 (default), all compressed steps are merged."""
-        config = CompressionConfig(max_compressed_steps=2, keep_compressed_steps=0)
+        """With keep_compressed_steps=0 (default), all compressed steps are removed and knowledge extracted."""
+        config = CompressionConfig(max_compressed_steps=2, keep_compressed_steps=0, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Consolidated summary.",
+            content="<summary>All data searched, analyzed and output prepared.</summary>",
             token_usage=TokenUsage(input_tokens=200, output_tokens=30),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -663,12 +671,12 @@ def test_merge_compressed_keep_zero_merges_all(self):
             ),
         ]
 
-        result = compressor.merge_compressed(steps)
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
 
-        # All should be merged into one
-        assert len(result) == 1
-        assert isinstance(result[0], CompressedHistoryStep)
-        assert result[0].original_step_count == 8  # 3 + 3 + 2
+        # All compressed steps should be removed
+        assert len(result_steps) == 0
+        # Knowledge should contain the extracted info
+        assert "<summary>" in result_knowledge
 
     def test_merge_compressed_keep_too_many_returns_original(self):
         """If keep_compressed_steps >= len-1, only 1 left to merge, so return original."""
@@ -689,16 +697,18 @@ def test_merge_compressed_keep_too_many_returns_original(self):
             ),
         ]
 
-        result = compressor.merge_compressed(steps)
-        assert result == steps  # Nothing merged
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
+        assert result_steps == steps  # Nothing merged
+        assert result_knowledge == ""
         mock_model.generate.assert_not_called()
 
-    def test_merge_compressed_accumulates_overlapping_step_numbers(self):
-        config = CompressionConfig(max_compressed_steps=1, keep_compressed_steps=0)
+    def test_merge_compressed_removes_merged_steps(self):
+        """Verify that merged compressed steps are removed from the step list."""
+        config = CompressionConfig(max_compressed_steps=1, keep_compressed_steps=0, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Merged.",
+            content="<info>Important data</info>",
             token_usage=TokenUsage(input_tokens=50, output_tokens=10),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -714,19 +724,20 @@ def test_merge_compressed_accumulates_overlapping_step_numbers(self):
                 compressed_step_numbers=[3, 4, 5],
                 original_step_count=3,
             ),
+            ActionStep(step_number=6, timing=Timing(start_time=0, end_time=1)),
         ]
 
-        result = compressor.merge_compressed(steps)
-        merged = result[0]
-        # Overlapping step number 3 should be deduplicated
-        assert merged.compressed_step_numbers == [1, 2, 3, 4, 5]
-        # Total original count is the sum (not deduplicated)
-        assert merged.original_step_count == 6
+        result_steps, result_knowledge = compressor.merge_compressed(steps)
+        # Only ActionStep should remain
+        assert len(result_steps) == 1
+        assert isinstance(result_steps[0], ActionStep)
+        # Knowledge should be populated
+        assert "<info>Important data</info>" in result_knowledge
 
 
 class TestCreateCompressionCallback:
     def test_callback_triggers_compression(self):
-        config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2)
+        config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
@@ -738,6 +749,7 @@ def test_callback_triggers_compression(self):
 
         # Create mock agent with memory (include content so original_chars > summary_chars)
         mock_agent = MagicMock()
+        mock_agent.memory.knowledge = ""
         mock_agent.memory.steps = [
             ActionStep(
                 step_number=i,
@@ -785,12 +797,13 @@ def test_callback_triggers_merge(self):
             keep_recent_steps=2,
             max_compressed_steps=1,
             keep_compressed_steps=0,
+            min_compression_chars=0,
         )
         mock_model = MagicMock()
-        # First call: compress, second call: merge
+        # First call: compress, second call: merge (knowledge extraction)
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Short.",
+            content="<extracted>Knowledge from merge.</extracted>",
             token_usage=TokenUsage(input_tokens=100, output_tokens=10),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -798,6 +811,7 @@ def test_callback_triggers_merge(self):
 
         # Set up agent with multiple compressed steps + action steps that exceed threshold
         mock_agent = MagicMock()
+        mock_agent.memory.knowledge = ""
         mock_agent.memory.steps = [
             CompressedHistoryStep(
                 summary="This is a long enough first summary that the merge will save space versus the originals.",
@@ -840,3 +854,101 @@ def test_callback_triggers_merge(self):
 
         # The model should have been called (compression and/or merge)
         assert mock_model.generate.call_count >= 1
+
+
+class TestMergeContext:
+    def test_append_new_tag(self):
+        from smolagents.bp_compression import merge_context
+        existing = "<db>PostgreSQL</db>"
+        updates = "<auth>JWT tokens</auth>"
+        result = merge_context(existing, updates)
+        assert "<db>PostgreSQL</db>" in result
+        assert "<auth>JWT tokens</auth>" in result
+
+    def test_update_existing_tag(self):
+        from smolagents.bp_compression import merge_context
+        existing = "<db>PostgreSQL</db>\n<status>In progress</status>"
+        updates = "<status>Complete</status>"
+        result = merge_context(existing, updates)
+        assert "<db>PostgreSQL</db>" in result
+        assert "<status>Complete</status>" in result
+        assert "In progress" not in result
+
+    def test_delete_with_self_closing(self):
+        from smolagents.bp_compression import merge_context
+        existing = "<db>PostgreSQL</db>\n<old_notes>Some notes</old_notes>"
+        updates = "<old_notes/>"
+        result = merge_context(existing, updates)
+        assert "<db>PostgreSQL</db>" in result
+        assert "old_notes" not in result
+
+    def test_delete_with_empty_tag(self):
+        from smolagents.bp_compression import merge_context
+        existing = "<db>PostgreSQL</db>\n<old_notes>Some notes</old_notes>"
+        updates = "<old_notes></old_notes>"
+        result = merge_context(existing, updates)
+        assert "<db>PostgreSQL</db>" in result
+        assert "old_notes" not in result
+
+    def test_mixed_operations(self):
+        from smolagents.bp_compression import merge_context
+        existing = "<plan>Step 1</plan>\n<db>MySQL</db>\n<old>Remove me</old>"
+        updates = "<plan>Step 2</plan><old/><auth>JWT</auth>"
+        result = merge_context(existing, updates)
+        assert "<plan>Step 2</plan>" in result
+        assert "<auth>JWT</auth>" in result
+        assert "old" not in result.lower() or "<old" not in result
+        assert "<db>MySQL</db>" in result
+
+    def test_empty_existing(self):
+        from smolagents.bp_compression import merge_context
+        result = merge_context("", "<info>New data</info>")
+        assert "<info>New data</info>" in result
+
+    def test_empty_updates(self):
+        from smolagents.bp_compression import merge_context
+        result = merge_context("<db>PostgreSQL</db>", "")
+        assert "<db>PostgreSQL</db>" in result
+
+
+class TestListXmlTagNames:
+    def test_basic(self):
+        from smolagents.bp_compression import list_xml_tag_names
+        text = "<db>PostgreSQL</db>\n<auth>JWT</auth>\n<plan>Steps</plan>"
+        tags = list_xml_tag_names(text)
+        assert tags == ["auth", "db", "plan"]
+
+    def test_empty(self):
+        from smolagents.bp_compression import list_xml_tag_names
+        assert list_xml_tag_names("") == []
+
+    def test_no_tags(self):
+        from smolagents.bp_compression import list_xml_tag_names
+        assert list_xml_tag_names("plain text no tags") == []
+
+
+class TestCreateKnowledgeExtractionPrompt:
+    def test_includes_summaries(self):
+        from smolagents.bp_compression import create_knowledge_extraction_prompt, CompressedHistoryStep
+        steps = [
+            CompressedHistoryStep(summary="Found PostgreSQL", compressed_step_numbers=[1], original_step_count=1),
+        ]
+        prompt = create_knowledge_extraction_prompt(steps)
+        assert "Found PostgreSQL" in prompt
+
+    def test_includes_existing_tags(self):
+        from smolagents.bp_compression import create_knowledge_extraction_prompt, CompressedHistoryStep
+        steps = [
+            CompressedHistoryStep(summary="Some info", compressed_step_numbers=[1], original_step_count=1),
+        ]
+        prompt = create_knowledge_extraction_prompt(steps, existing_tag_names=["db", "plan"])
+        assert "db" in prompt
+        assert "plan" in prompt
+
+    def test_without_existing_tags(self):
+        from smolagents.bp_compression import create_knowledge_extraction_prompt, CompressedHistoryStep
+        steps = [
+            CompressedHistoryStep(summary="Info", compressed_step_numbers=[1], original_step_count=1),
+        ]
+        prompt = create_knowledge_extraction_prompt(steps, existing_tag_names=[])
+        assert "no existing" in prompt.lower() or "new" in prompt.lower() or len(prompt) > 0

From cca89747084a17849a8fb339930c7d730f4c7b5d Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 19:18:09 -0300
Subject: [PATCH 02/38] feat: Update session save/load and stats for knowledge
 store

- save_session and save_session_to_dict now persist agent.memory.knowledge
- load_session and load_session_from_dict restore knowledge (backward
  compatible: missing key defaults to empty string)
- /show-stats shows knowledge store size when non-empty
- /show-compression-stats shows knowledge chars and section tag names
- /show-memory-stats shows knowledge store chars
- /session-save and /session-load messages include knowledge info
- Added 3 session tests: roundtrip, empty roundtrip, backward compat

All 87 tests pass (67 compression + 20 session).

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 commit_msg.txt               | 27 +++++++++++++++++++
 src/smolagents/bp_cli.py     | 25 +++++++++++++++--
 src/smolagents/bp_session.py |  6 +++++
 tests/test_bp_session.py     | 52 ++++++++++++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 commit_msg.txt

diff --git a/commit_msg.txt b/commit_msg.txt
new file mode 100644
index 000000000..bcf457c17
--- /dev/null
+++ b/commit_msg.txt
@@ -0,0 +1,27 @@
+
+feat: Add knowledge store for persistent agent memory
+
+Implements a tag-based XML knowledge system that replaces the lossy
+summary-of-summaries merge with knowledge extraction:
+
+- AgentMemory gains a knowledge field (persistent tagged XML string)
+- Knowledge is injected into context just before the last message
+- merge_compressed() now extracts tagged XML facts via LLM and applies
+  them to the knowledge store using merge_context() (deterministic:
+  update existing tags, delete empty tags, append new tags)
+- Merged compressed steps are removed (info lives in knowledge)
+- New UpdateKnowledge tool lets the agent manage its own knowledge
+- Three new functions: merge_context(), list_xml_tag_names(),
+  create_knowledge_extraction_prompt()
+
+Files changed:
+- memory.py: knowledge field + reset
+- agents.py: knowledge injection + UpdateKnowledge tool registration
+- bp_compression.py: merge_context, knowledge extraction prompt,
+  modified merge_compressed to return (steps, knowledge) tuple
+- bp_tools.py: UpdateKnowledge tool class
+- tests/test_compression.py: updated + 13 new tests (67 pass)
+
+Model: claude-opus-4.6
+
+Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index f6dbfefc2..812832ffa 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -869,6 +869,11 @@ def print_stats(session_stats: dict, agent=None):
         table.add_row("System prompt", f"{breakdown['total']:,} chars")
         table.add_row("  Instructions", f"{breakdown['instructions']:,} chars")
         table.add_row("  Tool descriptions", f"{breakdown['tools']:,} chars")
+    if agent:
+        knowledge = getattr(agent.memory, "knowledge", "")
+        if knowledge:
+            table.add_row("", "")
+            table.add_row("Knowledge store", f"{len(knowledge):,} chars")
     console.print(table)
     console.print()
 
@@ -1013,6 +1018,12 @@ def cmd_compression_stats(agent):
     total_chars = sum(len(s.summary) for s in steps if isinstance(s, CompressedHistoryStep))
     compression_count = compressor._compression_count if compressor else 0
 
+    # Knowledge store info
+    knowledge = getattr(agent.memory, "knowledge", "")
+    knowledge_chars = len(knowledge)
+    from smolagents.bp_compression import list_xml_tag_names
+    knowledge_tags = list_xml_tag_names(knowledge) if knowledge else []
+
     console.print()
     console.print(Rule("[bold]Compression Stats", style="blue"))
     stats_table = Table(show_header=False, box=None, padding=(0, 2))
@@ -1023,6 +1034,8 @@ def cmd_compression_stats(agent):
     stats_table.add_row("Original steps compressed", str(compressed_original))
     stats_table.add_row("Compression runs", str(compression_count))
     stats_table.add_row("Compressed summary chars", f"{total_chars:,}")
+    stats_table.add_row("Knowledge store chars", f"{knowledge_chars:,}")
+    stats_table.add_row("Knowledge sections", f"{len(knowledge_tags)} ({', '.join(knowledge_tags)})" if knowledge_tags else "0")
     console.print(stats_table)
     console.print()
 
@@ -1061,8 +1074,12 @@ def cmd_memory_stats(agent):
     table.add_row("Total memory steps", str(total_steps))
     for type_name, count in sorted(type_counts.items()):
         table.add_row(f"  {type_name}", str(count))
+    # Knowledge store
+    knowledge = getattr(agent.memory, "knowledge", "")
+    knowledge_chars = len(knowledge)
     table.add_row("Total chars", f"{total_chars:,}")
     table.add_row("Estimated tokens", f"{total_tokens:,}")
+    table.add_row("Knowledge store chars", f"{knowledge_chars:,}")
     console.print(table)
     console.print()
 
@@ -1270,7 +1287,9 @@ def cmd_session_save(agent, session_stats: dict, args: str):
         filename += ".json"
     try:
         count = save_session(filename, agent, session_stats)
-        console.print(f"[green]Session saved to {filename} ({count} steps).[/]")
+        knowledge = getattr(agent.memory, "knowledge", "")
+        knowledge_info = f", knowledge: {len(knowledge):,} chars" if knowledge else ""
+        console.print(f"[green]Session saved to {filename} ({count} steps{knowledge_info}).[/]")
     except Exception as e:
         console.print(f"[red]Failed to save session: {e}[/]")
 
@@ -1291,7 +1310,9 @@ def cmd_session_load(agent, args: str) -> dict | None:
     try:
         stats = load_session(filename, agent)
         step_count = len(agent.memory.steps)
-        console.print(f"[green]Session loaded from {filename} ({step_count} steps).[/]")
+        knowledge = getattr(agent.memory, "knowledge", "")
+        knowledge_info = f", knowledge: {len(knowledge):,} chars" if knowledge else ""
+        console.print(f"[green]Session loaded from {filename} ({step_count} steps{knowledge_info}).[/]")
         return stats
     except Exception as e:
         console.print(f"[red]Failed to load session: {e}[/]")
diff --git a/src/smolagents/bp_session.py b/src/smolagents/bp_session.py
index c55d0e8d2..17c4d69a0 100644
--- a/src/smolagents/bp_session.py
+++ b/src/smolagents/bp_session.py
@@ -289,6 +289,7 @@ def save_session_to_dict(agent, session_stats: dict) -> dict:
             "system_prompt": agent.memory.system_prompt.system_prompt,
             "next_actionstep_id": agent._next_actionstep_id,
             "last_plan_step": agent._last_plan_step,
+            "knowledge": getattr(agent.memory, "knowledge", ""),
         },
         "session_stats": dict(session_stats),
         "monitor_state": {
@@ -322,6 +323,8 @@ def load_session_from_dict(payload: dict, agent) -> dict:
     agent_state = payload.get("agent_state", {})
     agent.memory.system_prompt = SystemPromptStep(system_prompt=agent_state.get("system_prompt", ""))
     agent.memory.steps = [deserialize_step(s) for s in payload.get("steps", [])]
+    if hasattr(agent.memory, "knowledge"):
+        agent.memory.knowledge = agent_state.get("knowledge", "")
 
     # Restore agent counters
     agent._next_actionstep_id = agent_state.get("next_actionstep_id", 1)
@@ -360,6 +363,7 @@ def save_session(filepath: str, agent, session_stats: dict) -> int:
             "system_prompt": agent.memory.system_prompt.system_prompt,
             "next_actionstep_id": agent._next_actionstep_id,
             "last_plan_step": agent._last_plan_step,
+            "knowledge": getattr(agent.memory, "knowledge", ""),
         },
         "session_stats": dict(session_stats),
         "monitor_state": {
@@ -399,6 +403,8 @@ def load_session(filepath: str, agent) -> dict:
     agent_state = payload.get("agent_state", {})
     agent.memory.system_prompt = SystemPromptStep(system_prompt=agent_state.get("system_prompt", ""))
     agent.memory.steps = [deserialize_step(s) for s in payload.get("steps", [])]
+    if hasattr(agent.memory, "knowledge"):
+        agent.memory.knowledge = agent_state.get("knowledge", "")
 
     # Restore agent counters
     agent._next_actionstep_id = agent_state.get("next_actionstep_id", 1)
diff --git a/tests/test_bp_session.py b/tests/test_bp_session.py
index f5b5e14f9..042b2b94f 100644
--- a/tests/test_bp_session.py
+++ b/tests/test_bp_session.py
@@ -481,3 +481,55 @@ def test_minimal_action_step(self, tmp_path):
         assert loaded.observations_images is None
         assert loaded.token_usage is None
         assert loaded.is_final_answer is False
+
+
+class TestKnowledgeRoundTrip:
+    def test_knowledge_saved_and_restored(self, tmp_path):
+        """Knowledge store should survive save/load roundtrip."""
+        filepath = str(tmp_path / "knowledge.json")
+        agent = FakeAgent()
+        agent.memory.knowledge = "<db>PostgreSQL 15</db>\n<auth>JWT tokens</auth>"
+        stats = {"turns": 5, "total_time": 10.0, "total_input_tokens": 100, "total_output_tokens": 50}
+
+        save_session(filepath, agent, stats)
+
+        agent2 = FakeAgent()
+        assert agent2.memory.knowledge == ""
+        load_session(filepath, agent2)
+        assert agent2.memory.knowledge == "<db>PostgreSQL 15</db>\n<auth>JWT tokens</auth>"
+
+    def test_empty_knowledge_roundtrip(self, tmp_path):
+        """Empty knowledge should remain empty after roundtrip."""
+        filepath = str(tmp_path / "no_knowledge.json")
+        agent = FakeAgent()
+        stats = {"turns": 0, "total_time": 0.0, "total_input_tokens": 0, "total_output_tokens": 0}
+
+        save_session(filepath, agent, stats)
+        agent2 = FakeAgent()
+        load_session(filepath, agent2)
+        assert agent2.memory.knowledge == ""
+
+    def test_backward_compatible_load(self, tmp_path):
+        """Loading a session saved without knowledge field should set knowledge to empty."""
+        import json
+        filepath = str(tmp_path / "old_session.json")
+        # Simulate old session format without knowledge key
+        old_payload = {
+            "version": 1,
+            "saved_at": "2025-01-01T00:00:00+00:00",
+            "agent_state": {
+                "system_prompt": "You are a helpful assistant.",
+                "next_actionstep_id": 1,
+                "last_plan_step": 0,
+            },
+            "session_stats": {"turns": 0, "total_time": 0.0, "total_input_tokens": 0, "total_output_tokens": 0},
+            "monitor_state": {"total_input_token_count": 0, "total_output_token_count": 0},
+            "steps": [],
+        }
+        with open(filepath, "w") as f:
+            json.dump(old_payload, f)
+
+        agent = FakeAgent()
+        agent.memory.knowledge = "should be cleared"
+        load_session(filepath, agent)
+        assert agent.memory.knowledge == ""

From 34cbf95ea3fa3dd26ec917fe49ef545b95abae51 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 19:40:41 -0300
Subject: [PATCH 03/38] feat: Add
 should_add_tool_description_into_system_prompt flag

Add boolean property to Tool base class (default False). When True,
the tool full description is injected into the system prompt after
the compact tool listing, giving the agent richer guidance for
complex tools.

- Tool.should_add_tool_description_into_system_prompt added (tools.py)
- UpdateKnowledge sets flag to True with enhanced description
- All 3 prompt templates updated (code_agent, structured_code_agent,
  toolcalling_agent) with Jinja loop for flagged tools
- Removed temp commit_msg.txt from repo

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 commit_msg.txt                                | 27 -------------------
 src/smolagents/bp_tools.py                    | 19 +++++++++----
 src/smolagents/prompts/code_agent.yaml        |  4 +++
 .../prompts/structured_code_agent.yaml        |  4 +++
 src/smolagents/prompts/toolcalling_agent.yaml |  4 +++
 src/smolagents/tools.py                       |  1 +
 6 files changed, 27 insertions(+), 32 deletions(-)
 delete mode 100644 commit_msg.txt

diff --git a/commit_msg.txt b/commit_msg.txt
deleted file mode 100644
index bcf457c17..000000000
--- a/commit_msg.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-
-feat: Add knowledge store for persistent agent memory
-
-Implements a tag-based XML knowledge system that replaces the lossy
-summary-of-summaries merge with knowledge extraction:
-
-- AgentMemory gains a knowledge field (persistent tagged XML string)
-- Knowledge is injected into context just before the last message
-- merge_compressed() now extracts tagged XML facts via LLM and applies
-  them to the knowledge store using merge_context() (deterministic:
-  update existing tags, delete empty tags, append new tags)
-- Merged compressed steps are removed (info lives in knowledge)
-- New UpdateKnowledge tool lets the agent manage its own knowledge
-- Three new functions: merge_context(), list_xml_tag_names(),
-  create_knowledge_extraction_prompt()
-
-Files changed:
-- memory.py: knowledge field + reset
-- agents.py: knowledge injection + UpdateKnowledge tool registration
-- bp_compression.py: merge_context, knowledge extraction prompt,
-  modified merge_compressed to return (steps, knowledge) tuple
-- bp_tools.py: UpdateKnowledge tool class
-- tests/test_compression.py: updated + 13 new tests (67 pass)
-
-Model: claude-opus-4.6
-
-Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
diff --git a/src/smolagents/bp_tools.py b/src/smolagents/bp_tools.py
index 5beb62d59..fdf1b0be1 100644
--- a/src/smolagents/bp_tools.py
+++ b/src/smolagents/bp_tools.py
@@ -3005,12 +3005,21 @@ class UpdateKnowledge(Tool):
     """
 
     name = "update_knowledge"
+    should_add_tool_description_into_system_prompt = True
     description = (
-        "Update your persistent knowledge store with tagged XML sections. "
-        "The knowledge store appears in your context as <knowledge>...</knowledge> and survives compression. "
-        "Use descriptive tag names. To ADD or UPDATE a section: <tagname>content</tagname>. "
-        "To DELETE a section: <tagname/>. Only include sections you want to change. "
-        "Example: update_knowledge('<current_plan>1. Do X  2. Do Y</current_plan><old_notes/>')"
+        "Update your persistent knowledge store with tagged XML sections.\n\n"
+        "You have a <knowledge> section in your context containing your long-term notes. "
+        "It survives context compression and is always visible to you.\n\n"
+        "Usage: update_knowledge(updates='<tag_name>content</tag_name>')\n\n"
+        "Rules:\n"
+        "- To ADD a new section: use a new descriptive tag name\n"
+        "- To UPDATE an existing section: use the same tag name with new content\n"
+        "- To DELETE a section no longer relevant: use a self-closing tag <tag_name/>\n"
+        "- Tag names are free: use descriptive names like <plan>, <findings>, <codebase>, <decisions>\n"
+        "- Only include sections you want to change\n\n"
+        "Example:\n"
+        "  update_knowledge('<plan>1. Setup done\\n2. Now implementing API</plan><old_notes/>')\n\n"
+        "Use this to note important discoveries, track your plan, or remove stale information."
     )
     inputs = {
         "updates": {
diff --git a/src/smolagents/prompts/code_agent.yaml b/src/smolagents/prompts/code_agent.yaml
index 4d6c1e85c..9686391d8 100644
--- a/src/smolagents/prompts/code_agent.yaml
+++ b/src/smolagents/prompts/code_agent.yaml
@@ -112,6 +112,10 @@ system_prompt: |-
     {{ tool.to_compact_prompt() }}
     {%- endfor %}
     ```
+    {%- for tool in tools.values() if tool.should_add_tool_description_into_system_prompt %}
+
+    {{ tool.name }}: {{ tool.description }}
+    {%- endfor %}
 
     {%- if managed_agents and managed_agents.values() | list %}
     You can also give tasks to team members.
diff --git a/src/smolagents/prompts/structured_code_agent.yaml b/src/smolagents/prompts/structured_code_agent.yaml
index 6d77bd8ba..1a66b624a 100644
--- a/src/smolagents/prompts/structured_code_agent.yaml
+++ b/src/smolagents/prompts/structured_code_agent.yaml
@@ -81,6 +81,10 @@ system_prompt: |-
   {{ tool.to_compact_prompt() }}
   {%- endfor %}
   ```
+  {%- for tool in tools.values() if tool.should_add_tool_description_into_system_prompt %}
+
+  {{ tool.name }}: {{ tool.description }}
+  {%- endfor %}
 
   {%- if managed_agents and managed_agents.values() | list %}
   You can also give tasks to team members.
diff --git a/src/smolagents/prompts/toolcalling_agent.yaml b/src/smolagents/prompts/toolcalling_agent.yaml
index be3162571..82e24bb0d 100644
--- a/src/smolagents/prompts/toolcalling_agent.yaml
+++ b/src/smolagents/prompts/toolcalling_agent.yaml
@@ -93,6 +93,10 @@ system_prompt: |-
   {%- for tool in tools.values() %}
   - {{ tool.to_compact_prompt() }}
   {%- endfor %}
+  {%- for tool in tools.values() if tool.should_add_tool_description_into_system_prompt %}
+
+  {{ tool.name }}: {{ tool.description }}
+  {%- endfor %}
 
   {%- if managed_agents and managed_agents.values() | list %}
   You can also give tasks to team members.
diff --git a/src/smolagents/tools.py b/src/smolagents/tools.py
index b5dad0a04..c9ad0b036 100644
--- a/src/smolagents/tools.py
+++ b/src/smolagents/tools.py
@@ -133,6 +133,7 @@ class Tool(BaseTool):
     inputs: dict[str, dict[str, str | type | bool]]
     output_type: str
     output_schema: dict[str, Any] | None = None
+    should_add_tool_description_into_system_prompt: bool = False
 
     def __init__(self, *args, **kwargs):
         self.is_initialized = False

From 0d85775b665e5de445061eb9f370a94afb274c54 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 20:01:52 -0300
Subject: [PATCH 04/38] feat: Show context and knowledge size in turn summary
 and system prompt log

- bp_cli.py: print_turn_summary now shows Knowledge chars when non-empty
- agents.py: System prompt log line now includes Context and Knowledge chars

Example output:
  [System prompt: 16,301 chars | Instructions: 11,547 chars | Tool descriptions: 4,754 chars | Context: 27,595 chars | Knowledge: 2,450 chars]
  Turn 3 | 32.0s | In: 22,882 | Out: 386 | Total: 23,268 | Memory: 17 steps | Context: 27,595 chars | Knowledge: 2,450 chars

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py | 11 ++++++++++-
 src/smolagents/bp_cli.py |  3 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 963c8e380..267696f86 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -562,11 +562,20 @@ def run(
             title=self.name if hasattr(self, "name") else None,
         )
         breakdown = self.get_prompt_char_breakdown()
+        ctx_chars = self.get_context_char_size()
+        knowledge = getattr(self.memory, "knowledge", "")
+        knowledge_chars = len(knowledge) if knowledge else 0
+        extras = ""
+        if ctx_chars > 0:
+            extras += f" | Context: {ctx_chars:,} chars"
+        if knowledge_chars > 0:
+            extras += f" | Knowledge: {knowledge_chars:,} chars"
         self.logger.log(
             Text(
                 f"[System prompt: {breakdown['total']:,} chars"
                 f" | Instructions: {breakdown['instructions']:,} chars"
-                f" | Tool descriptions: {breakdown['tools']:,} chars]",
+                f" | Tool descriptions: {breakdown['tools']:,} chars"
+                f"{extras}]",
                 style="dim",
             ),
             level=LogLevel.INFO,
diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 812832ffa..c630f5df2 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -530,6 +530,9 @@ def print_turn_summary(turn_num: int, elapsed: float, input_tokens: int, output_
         ctx_chars = agent.get_context_char_size()
         if ctx_chars > 0:
             line += f" | Context: {format_tokens(ctx_chars)} chars"
+        knowledge = getattr(agent.memory, "knowledge", "")
+        if knowledge:
+            line += f" | Knowledge: {format_tokens(len(knowledge))} chars"
     line += f" | Auto-approve: {'on' if _auto_approve else 'off'}"
     line += "[/]"
     console.print(line)

From 8af881e3abbfa57313583d0d8ca1474610375ecf Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 20:11:51 -0300
Subject: [PATCH 05/38] docs: Update compression-plan.md to reflect implemented
 code

Rewrote the document from an initial implementation plan to accurate
documentation of the current two-phase compression pipeline:

- Phase 1: Step compression into CompressedHistoryStep
- Phase 2: Knowledge extraction into memory.knowledge (tagged XML)
- All CompressionConfig fields with actual defaults
- All functions and classes in bp_compression.py
- Knowledge store mechanics (merge_context, update_knowledge tool)
- BPSA CLI environment variables with defaults
- Updated file list, usage examples, and design decisions

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 docs/compression-plan.md | 184 +++++++++++++++++++++++++++++----------
 1 file changed, 137 insertions(+), 47 deletions(-)

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
index 32eba32da..7b03dc007 100644
--- a/docs/compression-plan.md
+++ b/docs/compression-plan.md
@@ -1,81 +1,159 @@
-# Context Compression Implementation Plan
+
+# Context Compression & Knowledge Extraction
 
 ## Overview
-Add a hybrid rolling summarization system to smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail.
+A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. When compressed summaries accumulate, they are further distilled into a persistent knowledge store using tagged XML.
+
+## Architecture
+
+### Two-Phase Compression Pipeline
+
+**Phase 1 — Step Compression:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. Recent steps are kept in full detail.
 
-## Files to Create/Modify
+**Phase 2 — Knowledge Extraction:** When compressed steps accumulate beyond a threshold, older ones are merged into a persistent `memory.knowledge` store as tagged XML. The merged compressed steps are then removed entirely.
+
+```
+Steps accumulate → compress older steps → CompressedHistoryStep summaries
+                                              ↓ (when too many accumulate)
+                                    Extract knowledge via LLM
+                                              ↓
+                                    merge_context() into memory.knowledge
+                                              ↓
+                                    Old compressed steps removed
+                                              ↓
+                                    Knowledge injected into LLM context
+                                    as <knowledge>...</knowledge> message
+```
 
-### 1. CREATE: `src/smolagents/bp_compression.py`
-New module containing all compression logic:
+## Files
+
+### `src/smolagents/bp_compression.py`
+All compression and knowledge logic:
 
 ```python
 @dataclass
 class CompressionConfig:
     enabled: bool = True
-    keep_recent_steps: int = 5          # Recent steps to keep in full
+    keep_recent_steps: int = 5            # Recent steps to keep in full
     max_uncompressed_steps: int = 10      # Compress when exceeds this
-    estimated_token_threshold: int = 0  # Token-based trigger (0=disabled)
-    compression_model: Model | None = None  # Cheaper model for compression
-    preserve_error_steps: bool = True
-    preserve_final_answer_steps: bool = True
-    max_compressed_steps: int = 0       # Merge compressed summaries (0=disabled)
+    estimated_token_threshold: int = 0    # Token-based trigger (0=disabled)
+    compression_model: Model | None = None  # Separate model for compression (None=use main)
+    max_summary_tokens: int = 50000       # Max tokens for generated summary
+    preserve_error_steps: bool = False    # Keep error steps uncompressed
+    preserve_final_answer_steps: bool = True  # Keep final_answer steps uncompressed
+    max_compressed_steps: int = 32        # Merge compressed steps when exceeds this
+    keep_compressed_steps: int = 22       # Recent compressed steps to keep during merge
+    min_compression_chars: int = 4096     # Skip compression if content below this
 
 @dataclass
 class CompressedHistoryStep(MemoryStep):
     summary: str
     compressed_step_numbers: list[int]
     original_step_count: int
-    # Implements to_messages() returning summary as USER message
+    timing: Timing | None
+    compression_token_usage: TokenUsage | None
+    # to_messages() renders as [COMPRESSED HISTORY - N steps summarized]
 
 class ContextCompressor:
     def should_compress(steps) -> bool
-    def compress(steps) -> list[MemoryStep]  # Returns new list with compressed history
+    def compress(steps) -> list[MemoryStep]
+    def should_merge_compressed(steps) -> bool
+    def merge_compressed(steps, knowledge) -> tuple[list[MemoryStep], str]
 ```
 
 Key functions:
-- `estimate_tokens(text)` - Character-based heuristic (~4 chars/token)
-- `should_preserve_step(step, config)` - Check if step must be kept
-- `create_compression_prompt(steps)` - Build LLM prompt for summarization
-- `create_compression_callback(compressor)` - Callback for automatic triggering
-
-### 2. MODIFY: `src/smolagents/agents.py`
-Add to `MultiStepAgent.__init__` (around line 360):
-- New parameter: `compression_config: CompressionConfig | None = None`
-- New method: `_setup_compression()` that registers compression callback
-
-Integration via existing callback system (lines 425-443) - no changes to core agent loop.
-
-### 3. MODIFY: `src/smolagents/__init__.py`
-Add exports:
-```python
-from .bp_compression import CompressionConfig, CompressedHistoryStep, ContextCompressor
-```
-
-### 4. CREATE: `tests/test_compression.py`
+- `estimate_tokens(text)` — Character-based heuristic (~4 chars/token)
+- `estimate_step_tokens(step)` — Token estimate for a memory step
+- `should_preserve_step(step, config)` — Check if step must be kept
+- `create_compression_prompt(steps)` — Build LLM prompt for step summarization
+- `create_knowledge_extraction_prompt(steps, tag_names)` — Build LLM prompt for knowledge extraction
+- `create_merge_prompt(steps)` — Build prompt for merging compressed steps
+- `list_xml_tag_names(text)` — Extract XML tag names from a string
+- `merge_context(existing, updates)` — Apply tagged XML diff (add/update/delete)
+- `create_compression_callback(compressor)` — Callback for automatic triggering
+
+### `src/smolagents/agents.py`
+Integration in `MultiStepAgent`:
+- `__init__` accepts `compression_config: CompressionConfig | None = None`
+- `_setup_compression()` registers the compression callback
+- `write_memory_to_messages()` injects `memory.knowledge` as a `<knowledge>` message just before the last message in context
+- System prompt log line shows Context and Knowledge char counts
+
+### `src/smolagents/memory.py`
+- `AgentMemory.knowledge: str = ""` — Persistent knowledge store (tagged XML)
+- Reset on `memory.reset()`
+
+### `src/smolagents/bp_tools.py`
+- `UpdateKnowledge` tool — Allows the agent to explicitly update its knowledge store via `update_knowledge(updates='<tag>content</tag>')`
+
+### `src/smolagents/bp_cli.py`
+- `print_turn_summary()` shows Context and Knowledge char counts
+- Environment variable configuration (see below)
+
+### `tests/test_compression.py`
 Tests for:
 - `CompressedHistoryStep.to_messages()` and `dict()` serialization
 - Token estimation functions
 - `should_preserve_step()` logic
 - `ContextCompressor.should_compress()` threshold behavior
+- `merge_context()` add/update/delete operations
+- `list_xml_tag_names()` extraction
 - Integration test with mock model
 
-## Implementation Sequence
+## Knowledge Store
+
+The knowledge store (`memory.knowledge`) is a plain string of tagged XML:
+
+```xml
+<plan>1. Setup done
+2. Now implementing API</plan>
+<key_findings>The database uses PostgreSQL 14 with pgvector extension</key_findings>
+<current_status>API endpoints implemented, testing in progress</current_status>
+```
+
+**Two sources of updates:**
+1. **Automatic:** `merge_compressed()` extracts knowledge from old compressed summaries (Phase 2)
+2. **Manual:** The `update_knowledge` tool lets the agent explicitly add/update/delete sections
+
+**`merge_context(existing, updates)` applies three operations:**
+- `<tag>content</tag>` where tag exists → **UPDATE** (replace content)
+- `<tag>content</tag>` where tag is new → **APPEND**
+- `<tag/>` or `<tag></tag>` (self-closing/empty) → **DELETE**
+
+**Injection:** Knowledge is inserted as a `<knowledge>...</knowledge>` USER message just before the last message in the LLM context, giving it high attention weight.
 
-1. Create `bp_compression.py` with all classes and functions
-2. Modify `MultiStepAgent.__init__` to accept `compression_config`
-3. Add `_setup_compression()` method to register callback
-4. Update `__init__.py` exports
-5. Create test file
-6. Run tests to verify
+## BPSA CLI Configuration
+
+Environment variables (with defaults used by the CLI):
+
+| Variable | Default | Description |
+|---|---|---|
+| `BPSA_COMPRESSION_ENABLED` | `1` | Enable compression |
+| `BPSA_COMPRESSION_KEEP_RECENT_STEPS` | `40` | Recent steps to keep uncompressed |
+| `BPSA_COMPRESSION_MAX_UNCOMPRESSED_STEPS` | `50` | Trigger threshold for compression |
+| `BPSA_COMPRESSION_KEEP_COMPRESSED_STEPS` | `80` | Compressed steps to keep on merge |
+| `BPSA_COMPRESSION_MAX_COMPRESSED_STEPS` | `120` | Trigger threshold for merge |
+| `BPSA_COMPRESSION_TOKEN_THRESHOLD` | `0` | Token-based trigger (0=disabled) |
+| `BPSA_COMPRESSION_MODEL` | same as main | Model ID for compression |
+| `BPSA_COMPRESSION_MAX_SUMMARY_TOKENS` | `50000` | Max tokens in summary |
+| `BPSA_COMPRESSION_PRESERVE_ERROR_STEPS` | `0` | Keep error steps uncompressed |
+| `BPSA_COMPRESSION_PRESERVE_FINAL_ANSWER_STEPS` | `1` | Keep final_answer steps |
+| `BPSA_COMPRESSION_MIN_CHARS` | `4096` | Min chars before compressing |
+
+Note: The CLI defaults differ from `CompressionConfig` defaults to suit interactive use (more steps kept).
 
 ## Usage Example
+
+### Programmatic
 ```python
 from smolagents import CodeAgent, CompressionConfig, LiteLLMModel
 
 config = CompressionConfig(
     keep_recent_steps=5,
-    max_uncompressed_steps=8,
-    compression_model=LiteLLMModel(model_id="gpt-4o-mini"),  # Cheap model
+    max_uncompressed_steps=10,
+    compression_model=LiteLLMModel(model_id="gpt-4o-mini"),  # Cheaper model
+    max_compressed_steps=32,
+    keep_compressed_steps=22,
 )
 
 agent = CodeAgent(
@@ -85,13 +163,25 @@ agent = CodeAgent(
 )
 ```
 
+### BPSA CLI
+```bash
+export BPSA_COMPRESSION_ENABLED=1
+export BPSA_COMPRESSION_KEEP_RECENT_STEPS=40
+export BPSA_COMPRESSION_MAX_UNCOMPRESSED_STEPS=50
+bpsa
+```
+
 ## Design Decisions
-- **New file vs existing**: New `bp_compression.py` keeps related logic together, follows pattern of `monitoring.py`
-- **Callback-based**: Uses existing callback system for clean integration without modifying agent loop
-- **Token estimation**: Character heuristic (4 chars/token) since no proactive token counting exists
-- **Graceful fallback**: If compression LLM call fails, keep original steps and log warning
+- **New file vs existing:** `bp_compression.py` keeps all compression/knowledge logic together, follows pattern of `monitoring.py`
+- **Callback-based:** Uses existing callback system for clean integration without modifying the agent loop
+- **Token estimation:** Character heuristic (4 chars/token) since no proactive token counting exists
+- **Graceful fallback:** If compression or knowledge extraction LLM call fails, keep original steps and log warning
+- **Two-phase design:** Step compression (lossy but retains prose summaries) feeds into knowledge extraction (structured XML) for long-term retention
+- **Tagged XML for knowledge:** Simple, parseable format that supports incremental updates via diff operations
+- **Knowledge placement:** Injected near end of context for high attention weight in transformer models
+- **Min chars threshold:** Avoids wasting LLM calls on already-concise content
 
 ## Verification
 1. Run existing tests: `pytest tests/test_memory.py tests/test_agents.py`
-2. Run new tests: `pytest tests/test_compression.py`
-3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed
+2. Run compression tests: `pytest tests/test_compression.py`
+3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates

From 0213cec58587212b40871def29f589d55e33dc34 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 20:24:56 -0300
Subject: [PATCH 06/38] feat: Knowledge-aware compression (Option B)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 step compression now knows which topics are already in the
persistent knowledge store. The compression prompt tells the LLM:
"These topics are already captured in knowledge: <tags>. Do NOT
repeat them. Focus on new findings, changes, or corrections."

This avoids redundant information in compressed summaries without
any extra LLM calls — the existing compression call simply gets
a smarter prompt.

Changes:
- create_compression_prompt() accepts optional knowledge_tag_names
- compress() accepts optional knowledge string, extracts tag names
- compression_callback passes agent.memory.knowledge to compress()
- Fixed pre-existing test (Planning step assertion) and added 3 new
  tests for knowledge-aware prompt generation

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_compression.py | 29 +++++++++++++++-----
 tests/test_compression.py        | 46 +++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 083370947..4063a8948 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -234,14 +234,22 @@ def should_preserve_step(step: MemoryStep, config: CompressionConfig) -> bool:
     return False
 
 
-def create_compression_prompt(steps_to_compress: list[MemoryStep]) -> str:
+def create_compression_prompt(
+    steps_to_compress: list[MemoryStep],
+    knowledge_tag_names: list[str] | None = None,
+) -> str:
     """Create the prompt for the compression LLM call.
 
     Builds a structured representation of the steps to compress and asks
     the LLM to generate a concise summary preserving key information.
 
+    When knowledge_tag_names is provided, the prompt tells the LLM which
+    topics are already captured in the persistent knowledge store so it
+    can avoid redundantly summarizing them and focus on new/changed info.
+
     Args:
         steps_to_compress: List of memory steps to summarize.
+        knowledge_tag_names: Optional list of tag names already in the knowledge store.
 
     Returns:
         The prompt string for the compression LLM call.
@@ -269,9 +277,17 @@ def create_compression_prompt(steps_to_compress: list[MemoryStep]) -> str:
 
     steps_text = "<\n>".join(step_descriptions)
 
+    knowledge_hint = ""
+    if knowledge_tag_names:
+        tag_list = ", ".join(knowledge_tag_names)
+        knowledge_hint = f"""
+The following topics are already captured in the persistent knowledge store: {tag_list}
+Do NOT repeat information that is already in knowledge. Focus on new findings, changes, or corrections.
+"""
+
     return f"""Summarize the following agent execution history (<execution_history></execution_history>) into a concise summary.
 {COMMON_COMPRESSION_INSTRUCTIONS}
-
+{knowledge_hint}
 This is the execution history:
 <execution_history>
 {steps_text}
@@ -501,7 +517,7 @@ def should_compress(self, steps: list[MemoryStep]) -> bool:
 
         return False
 
-    def compress(self, steps: list[MemoryStep]) -> list[MemoryStep]:
+    def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryStep]:
         """Compress older steps while preserving recent and critical steps.
 
         This method:
@@ -576,8 +592,9 @@ def compress(self, steps: list[MemoryStep]) -> list[MemoryStep]:
                 logger.info(f"Compression skipped: content ({original_chars} chars) < min_compression_chars ({self.config.min_compression_chars})")
             return steps
 
-        # Generate summary using LLM
-        compression_prompt = create_compression_prompt(steps_to_compress)
+        # Generate summary using LLM (knowledge-aware: skip topics already in knowledge)
+        knowledge_tag_names = list_xml_tag_names(knowledge) if knowledge else None
+        compression_prompt = create_compression_prompt(steps_to_compress, knowledge_tag_names)
 
         try:
             summary_message = self.compression_model.generate(
@@ -824,7 +841,7 @@ def compression_callback(step: MemoryStep, agent: "MultiStepAgent") -> None:
             return
 
         if compressor.should_compress(agent.memory.steps):
-            agent.memory.steps = compressor.compress(agent.memory.steps)
+            agent.memory.steps = compressor.compress(agent.memory.steps, agent.memory.knowledge)
 
         if compressor.should_merge_compressed(agent.memory.steps):
             agent.memory.steps, agent.memory.knowledge = compressor.merge_compressed(
diff --git a/tests/test_compression.py b/tests/test_compression.py
index ad2b1354c..19c26b26a 100644
--- a/tests/test_compression.py
+++ b/tests/test_compression.py
@@ -275,9 +275,53 @@ def test_creates_prompt_for_planning_steps(self):
             ),
         ]
         prompt = create_compression_prompt(steps)
-        assert "Planning step:" in prompt
+        assert "<plan>" in prompt
         assert "First step" in prompt
 
+    def test_creates_prompt_with_knowledge_tags(self):
+        steps = [
+            ActionStep(
+                step_number=1,
+                model_input_messages=[],
+                model_output="Found the config file",
+                observations="config.yaml loaded",
+                model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Found config"),
+                timing=Timing(start_time=0, end_time=1),
+            ),
+        ]
+        prompt = create_compression_prompt(steps, knowledge_tag_names=["plan", "architecture"])
+        assert "plan, architecture" in prompt
+        assert "already captured in the persistent knowledge store" in prompt
+        assert "Do NOT repeat" in prompt
+
+    def test_creates_prompt_without_knowledge_tags(self):
+        steps = [
+            ActionStep(
+                step_number=1,
+                model_input_messages=[],
+                model_output="Found the config file",
+                observations="config.yaml loaded",
+                model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Found config"),
+                timing=Timing(start_time=0, end_time=1),
+            ),
+        ]
+        prompt = create_compression_prompt(steps, knowledge_tag_names=None)
+        assert "already captured in the persistent knowledge store" not in prompt
+
+    def test_creates_prompt_with_empty_knowledge_tags(self):
+        steps = [
+            ActionStep(
+                step_number=1,
+                model_input_messages=[],
+                model_output="Found the config file",
+                observations="config.yaml loaded",
+                model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Found config"),
+                timing=Timing(start_time=0, end_time=1),
+            ),
+        ]
+        prompt = create_compression_prompt(steps, knowledge_tag_names=[])
+        assert "already captured in the persistent knowledge store" not in prompt
+
 
 class TestCreateMergePrompt:
     def test_creates_prompt_from_compressed_steps(self):

From aaca159bc222f1078866cde101b8b77bd1375246 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 20:43:35 -0300
Subject: [PATCH 07/38] feat: Combined summary + knowledge update in Phase 1
 compression (Option 2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 1 step compression now produces both a summary AND knowledge
updates in a single LLM call. The compression prompt asks the LLM to
output structured format:

  <summary>Concise summary...</summary>
  <knowledge_updates>
  <tag>content</tag>
  </knowledge_updates>

When the LLM includes knowledge_updates, they are applied via the
existing merge_context() function. If the LLM doesn't follow the
structured format (no <summary> tags), the entire output is used
as the summary — fully backwards compatible.

This means knowledge starts accumulating from the FIRST compression
cycle, not after Phase 2 triggers (which requires 32+ compressed steps).
No extra LLM calls — same single compression call, smarter prompt.

Changes to bp_compression.py:
- create_compression_prompt() now takes full knowledge string (not tag names)
  and includes it as <current_knowledge> context in the prompt
- New parse_compression_output() parses <summary> + <knowledge_updates>
  with graceful fallback for unstructured output
- compress() returns tuple[list[MemoryStep], str] (steps, knowledge)
- compress() applies knowledge_updates via merge_context() when present
- compression_callback unpacks tuple return
- parse_compression_output added to __all__

Changes to bp_cli.py:
- /compress command handles tuple return from compress()

Changes to tests/test_compression.py:
- 6 new tests for parse_compression_output (structured, summary-only,
  fallback, no-summary-with-knowledge, empty, None)
- 2 new tests for compress (knowledge extraction, fallback no tags)
- Updated existing compress tests for tuple return type
- Updated prompt tests for new knowledge parameter

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py         |   2 +-
 src/smolagents/bp_compression.py | 152 ++++++++++++++++++++++++-------
 tests/test_compression.py        | 150 +++++++++++++++++++++++++-----
 3 files changed, 246 insertions(+), 58 deletions(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index c630f5df2..093ee3c56 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -1155,7 +1155,7 @@ def cmd_compress(agent, args: str):
         old_threshold = compressor.config.max_uncompressed_steps
         compressor.config.max_uncompressed_steps = 0  # Force trigger
         original_len = len(agent.memory.steps)
-        agent.memory.steps = compressor.compress(agent.memory.steps)
+        agent.memory.steps, agent.memory.knowledge = compressor.compress(agent.memory.steps, agent.memory.knowledge)
         compressor.config.max_uncompressed_steps = old_threshold
         new_len = len(agent.memory.steps)
         if new_len < original_len:
diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 4063a8948..6943c9ca5 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -35,6 +35,7 @@
     "merge_context",
     "list_xml_tag_names",
     "create_knowledge_extraction_prompt",
+    "parse_compression_output",
 ]
 
 
@@ -236,20 +237,20 @@ def should_preserve_step(step: MemoryStep, config: CompressionConfig) -> bool:
 
 def create_compression_prompt(
     steps_to_compress: list[MemoryStep],
-    knowledge_tag_names: list[str] | None = None,
+    knowledge: str = "",
 ) -> str:
     """Create the prompt for the compression LLM call.
 
     Builds a structured representation of the steps to compress and asks
     the LLM to generate a concise summary preserving key information.
 
-    When knowledge_tag_names is provided, the prompt tells the LLM which
-    topics are already captured in the persistent knowledge store so it
-    can avoid redundantly summarizing them and focus on new/changed info.
+    When knowledge is provided, the full knowledge store is included so the
+    LLM can avoid redundancy and also propose knowledge updates (corrections,
+    new findings) as part of its output.
 
     Args:
         steps_to_compress: List of memory steps to summarize.
-        knowledge_tag_names: Optional list of tag names already in the knowledge store.
+        knowledge: Current knowledge store content (tagged XML). Empty string if none.
 
     Returns:
         The prompt string for the compression LLM call.
@@ -277,23 +278,55 @@ def create_compression_prompt(
 
     steps_text = "<\n>".join(step_descriptions)
 
-    knowledge_hint = ""
-    if knowledge_tag_names:
-        tag_list = ", ".join(knowledge_tag_names)
-        knowledge_hint = f"""
-The following topics are already captured in the persistent knowledge store: {tag_list}
-Do NOT repeat information that is already in knowledge. Focus on new findings, changes, or corrections.
+    if knowledge and knowledge.strip():
+        knowledge_section = f"""
+The agent has a persistent knowledge store with the following content:
+<current_knowledge>
+{knowledge}
+</current_knowledge>
+
+Your summary should NOT repeat information already in knowledge.
+If the execution history contains corrections or important new information that
+should update the knowledge store, include a <knowledge_updates> section after
+your summary. Use XML tags to add, update, or delete knowledge sections:
+- To ADD or UPDATE: <tag_name>new content</tag_name>
+- To DELETE an obsolete section: <tag_name/>
+
+If no knowledge updates are needed, omit the <knowledge_updates> section entirely.
+
+Output format:
+<summary>
+Your concise summary here...
+</summary>
+<knowledge_updates>
+...tagged updates if any...
+</knowledge_updates>
+"""
+    else:
+        knowledge_section = """
+If the execution history contains important information worth remembering
+long-term, include a <knowledge_updates> section after your summary with
+tagged XML sections (e.g., <plan>...</plan>, <architecture>...</architecture>).
+
+If no knowledge is worth extracting yet, omit the <knowledge_updates> section.
+
+Output format:
+<summary>
+Your concise summary here...
+</summary>
+<knowledge_updates>
+...tagged sections if any...
+</knowledge_updates>
 """
 
-    return f"""Summarize the following agent execution history (<execution_history></execution_history>) into a concise summary.
+    return f"""Summarize the following agent execution history into a concise summary.
 {COMMON_COMPRESSION_INSTRUCTIONS}
-{knowledge_hint}
+{knowledge_section}
 This is the execution history:
 <execution_history>
 {steps_text}
 </execution_history>
-
-SUMMARY:"""
+"""
 
 
 def create_merge_prompt(compressed_steps: list[CompressedHistoryStep]) -> str:
@@ -332,6 +365,45 @@ def create_merge_prompt(compressed_steps: list[CompressedHistoryStep]) -> str:
 CONSOLIDATED SUMMARY:"""
 
 
+
+
+
+def parse_compression_output(raw_output: str) -> tuple[str, str]:
+    """Parse structured compression output into summary and knowledge updates.
+
+    Expects output in the format:
+        <summary>...</summary>
+        <knowledge_updates>...</knowledge_updates>
+
+    Falls back gracefully: if no <summary> tags found, treats the entire
+    output as the summary with no knowledge updates.
+
+    Args:
+        raw_output: Raw LLM output from the compression call.
+
+    Returns:
+        Tuple of (summary, knowledge_updates). knowledge_updates may be empty string.
+    """
+    if not raw_output:
+        return "", ""
+
+    # Try to extract <summary>...</summary>
+    summary_match = re.search(r'<summary>(.*?)</summary>', raw_output, re.DOTALL)
+    if summary_match:
+        summary = summary_match.group(1).strip()
+    else:
+        # Fallback: no <summary> tags, use everything before <knowledge_updates> or entire output
+        knowledge_start = raw_output.find('<knowledge_updates>')
+        if knowledge_start >= 0:
+            summary = raw_output[:knowledge_start].strip()
+        else:
+            summary = raw_output.strip()
+
+    # Try to extract <knowledge_updates>...</knowledge_updates>
+    knowledge_match = re.search(r'<knowledge_updates>(.*?)</knowledge_updates>', raw_output, re.DOTALL)
+    knowledge_updates = knowledge_match.group(1).strip() if knowledge_match else ""
+
+    return summary, knowledge_updates
 def list_xml_tag_names(text: str) -> list[str]:
     """List unique top-level XML tag names found in text.
 
@@ -517,25 +589,27 @@ def should_compress(self, steps: list[MemoryStep]) -> bool:
 
         return False
 
-    def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryStep]:
+    def compress(self, steps: list[MemoryStep], knowledge: str = "") -> tuple[list[MemoryStep], str]:
         """Compress older steps while preserving recent and critical steps.
 
         This method:
         1. Identifies which steps must be preserved (TaskStep, errors, etc.)
         2. Keeps the most recent N compressible steps in full detail
         3. Compresses remaining old steps into a summary via LLM
-        4. Returns a new list with compressed history
+        4. Optionally extracts knowledge updates from the same LLM call
+        5. Returns a new step list and updated knowledge
 
         Args:
             steps: Current list of memory steps.
+            knowledge: Current knowledge store content (tagged XML).
 
         Returns:
-            New list with older steps compressed into a CompressedHistoryStep.
+            Tuple of (new_steps, updated_knowledge).
         """
         start_time = time.time()
 
         if not self.should_compress(steps):
-            return steps
+            return steps, knowledge
 
         # Separate preserved steps and compressible steps
         preserved_indices = set()
@@ -558,7 +632,7 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryS
         compressible_indices = [i for i in range(len(steps)) if i not in preserved_indices]
 
         if len(compressible_indices) <= self.config.keep_recent_steps:
-            return steps  # Nothing to compress
+            return steps, knowledge  # Nothing to compress
 
         # Steps to keep in full detail (most recent compressible ones)
         recent_to_keep = set(compressible_indices[-self.config.keep_recent_steps :])
@@ -567,7 +641,7 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryS
         to_compress_indices = [i for i in compressible_indices if i not in recent_to_keep]
 
         if not to_compress_indices:
-            return steps
+            return steps, knowledge
 
         steps_to_compress = [steps[i] for i in to_compress_indices]
 
@@ -590,11 +664,10 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryS
                 )
             else:
                 logger.info(f"Compression skipped: content ({original_chars} chars) < min_compression_chars ({self.config.min_compression_chars})")
-            return steps
+            return steps, knowledge
 
-        # Generate summary using LLM (knowledge-aware: skip topics already in knowledge)
-        knowledge_tag_names = list_xml_tag_names(knowledge) if knowledge else None
-        compression_prompt = create_compression_prompt(steps_to_compress, knowledge_tag_names)
+        # Generate summary using LLM (knowledge-aware: also extracts knowledge updates)
+        compression_prompt = create_compression_prompt(steps_to_compress, knowledge)
 
         try:
             summary_message = self.compression_model.generate(
@@ -605,14 +678,15 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryS
                     )
                 ]
             )
-            summary = summary_message.content
-            if isinstance(summary, list):
-                summary = " ".join(item.get("text", "") for item in summary if isinstance(item, dict))
+            raw_output = summary_message.content
+            if isinstance(raw_output, list):
+                raw_output = " ".join(item.get("text", "") for item in raw_output if isinstance(item, dict))
 
+            summary, knowledge_updates = parse_compression_output(raw_output)
             compression_token_usage = summary_message.token_usage
         except Exception as e:
             logger.warning(f"Compression failed, keeping original steps: {e}")
-            return steps
+            return steps, knowledge
 
         # Safety check: skip compression if summary is larger than original
         summary_chars = len(summary) if summary else 0
@@ -625,7 +699,7 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryS
                 )
             else:
                 logger.info(f"Compression skipped: summary ({summary_chars} chars) >= original ({original_chars} chars)")
-            return steps
+            return steps, knowledge
 
         # Build compressed step
         compressed_step_numbers = []
@@ -681,7 +755,21 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> list[MemoryS
                 f"(kept {len(new_steps)} steps total, compression #{self._compression_count})"
             )
 
-        return new_steps
+        # Apply knowledge updates if any were extracted
+        updated_knowledge = knowledge
+        if knowledge_updates:
+            updated_knowledge = merge_context(knowledge, knowledge_updates)
+            knowledge_chars = len(updated_knowledge) if updated_knowledge else 0
+            if self.agent_logger:
+                tag_names = list_xml_tag_names(updated_knowledge)
+                self.agent_logger.log_markdown(
+                    content=f"Knowledge updated during compression. "
+                    f"Store: {knowledge_chars:,} chars, sections: {tag_names}.",
+                    title="Knowledge Update (Phase 1)",
+                    level=LogLevel.INFO,
+                )
+
+        return new_steps, updated_knowledge
 
     def should_merge_compressed(self, steps: list[MemoryStep]) -> bool:
         """Check if compressed steps should be merged.
@@ -841,7 +929,7 @@ def compression_callback(step: MemoryStep, agent: "MultiStepAgent") -> None:
             return
 
         if compressor.should_compress(agent.memory.steps):
-            agent.memory.steps = compressor.compress(agent.memory.steps, agent.memory.knowledge)
+            agent.memory.steps, agent.memory.knowledge = compressor.compress(agent.memory.steps, agent.memory.knowledge)
 
         if compressor.should_merge_compressed(agent.memory.steps):
             agent.memory.steps, agent.memory.knowledge = compressor.merge_compressed(
diff --git a/tests/test_compression.py b/tests/test_compression.py
index 19c26b26a..82b5578d5 100644
--- a/tests/test_compression.py
+++ b/tests/test_compression.py
@@ -9,6 +9,7 @@
     ContextCompressor,
     create_compression_callback,
     create_compression_prompt,
+    parse_compression_output,
     create_merge_prompt,
     estimate_tokens,
     estimate_step_tokens,
@@ -278,7 +279,7 @@ def test_creates_prompt_for_planning_steps(self):
         assert "<plan>" in prompt
         assert "First step" in prompt
 
-    def test_creates_prompt_with_knowledge_tags(self):
+    def test_creates_prompt_with_knowledge(self):
         steps = [
             ActionStep(
                 step_number=1,
@@ -289,12 +290,15 @@ def test_creates_prompt_with_knowledge_tags(self):
                 timing=Timing(start_time=0, end_time=1),
             ),
         ]
-        prompt = create_compression_prompt(steps, knowledge_tag_names=["plan", "architecture"])
-        assert "plan, architecture" in prompt
-        assert "already captured in the persistent knowledge store" in prompt
-        assert "Do NOT repeat" in prompt
+        knowledge = "<plan>Step 1 done</plan>\n<architecture>REST API</architecture>"
+        prompt = create_compression_prompt(steps, knowledge=knowledge)
+        assert "<current_knowledge>" in prompt
+        assert "plan" in prompt
+        assert "architecture" in prompt
+        assert "should NOT repeat" in prompt
+        assert "<knowledge_updates>" in prompt
 
-    def test_creates_prompt_without_knowledge_tags(self):
+    def test_creates_prompt_without_knowledge(self):
         steps = [
             ActionStep(
                 step_number=1,
@@ -305,10 +309,12 @@ def test_creates_prompt_without_knowledge_tags(self):
                 timing=Timing(start_time=0, end_time=1),
             ),
         ]
-        prompt = create_compression_prompt(steps, knowledge_tag_names=None)
-        assert "already captured in the persistent knowledge store" not in prompt
+        prompt = create_compression_prompt(steps, knowledge="")
+        assert "<current_knowledge>" not in prompt
+        # Should still mention knowledge_updates as optional output
+        assert "<knowledge_updates>" in prompt
 
-    def test_creates_prompt_with_empty_knowledge_tags(self):
+    def test_creates_prompt_with_empty_knowledge(self):
         steps = [
             ActionStep(
                 step_number=1,
@@ -319,9 +325,46 @@ def test_creates_prompt_with_empty_knowledge_tags(self):
                 timing=Timing(start_time=0, end_time=1),
             ),
         ]
-        prompt = create_compression_prompt(steps, knowledge_tag_names=[])
-        assert "already captured in the persistent knowledge store" not in prompt
+        prompt = create_compression_prompt(steps, knowledge="   ")
+        assert "<current_knowledge>" not in prompt
+
+
+
+
+class TestParseCompressionOutput:
+    def test_parses_structured_output(self):
+        raw = "<summary>My summary here.</summary>\n<knowledge_updates>\n<plan>Step 1</plan>\n</knowledge_updates>"
+        summary, updates = parse_compression_output(raw)
+        assert summary == "My summary here."
+        assert "<plan>Step 1</plan>" in updates
+
+    def test_parses_summary_only(self):
+        raw = "<summary>Just a summary.</summary>"
+        summary, updates = parse_compression_output(raw)
+        assert summary == "Just a summary."
+        assert updates == ""
+
+    def test_fallback_no_tags(self):
+        raw = "Plain text summary without any tags."
+        summary, updates = parse_compression_output(raw)
+        assert summary == "Plain text summary without any tags."
+        assert updates == ""
 
+    def test_fallback_no_summary_tag_with_knowledge(self):
+        raw = "Some summary text\n<knowledge_updates>\n<plan>Do X</plan>\n</knowledge_updates>"
+        summary, updates = parse_compression_output(raw)
+        assert summary == "Some summary text"
+        assert "<plan>Do X</plan>" in updates
+
+    def test_empty_input(self):
+        summary, updates = parse_compression_output("")
+        assert summary == ""
+        assert updates == ""
+
+    def test_none_input(self):
+        summary, updates = parse_compression_output(None)
+        assert summary == ""
+        assert updates == ""
 
 class TestCreateMergePrompt:
     def test_creates_prompt_from_compressed_steps(self):
@@ -412,15 +455,16 @@ def test_compress_returns_original_when_not_needed(self):
         config = CompressionConfig(max_uncompressed_steps=20, keep_recent_steps=5)
         compressor = ContextCompressor(config, MagicMock())
         steps = [ActionStep(step_number=i, timing=Timing(start_time=0, end_time=1)) for i in range(5)]
-        result = compressor.compress(steps)
-        assert result == steps
+        new_steps, new_knowledge = compressor.compress(steps)
+        assert new_steps == steps
+        assert new_knowledge == ""
 
     def test_compress_creates_compressed_step(self):
         config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2, min_compression_chars=0)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Summary of steps 0-5.",
+            content="<summary>Summary of steps 0-5.</summary>",
             token_usage=TokenUsage(input_tokens=100, output_tokens=50),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -436,22 +480,77 @@ def test_compress_creates_compressed_step(self):
             for i in range(8)
         ]
 
-        result = compressor.compress(steps)
+        new_steps, new_knowledge = compressor.compress(steps)
 
         # Should have compressed step + 2 recent steps
-        assert len(result) < len(steps)
+        assert len(new_steps) < len(steps)
         # First step should be CompressedHistoryStep
-        assert isinstance(result[0], CompressedHistoryStep)
-        assert "Summary of steps" in result[0].summary
+        assert isinstance(new_steps[0], CompressedHistoryStep)
+        assert "Summary of steps" in new_steps[0].summary
         # Model should have been called
         mock_model.generate.assert_called_once()
+        # No knowledge updates in this case
+        assert new_knowledge == ""
+
+    def test_compress_extracts_knowledge_updates(self):
+        config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2, min_compression_chars=0)
+        mock_model = MagicMock()
+        mock_model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="<summary>Summary of work done.</summary>\n<knowledge_updates>\n<plan>Step 1 complete</plan>\n</knowledge_updates>",
+            token_usage=TokenUsage(input_tokens=100, output_tokens=50),
+        )
+        compressor = ContextCompressor(config, mock_model)
+
+        steps = [
+            ActionStep(
+                step_number=i,
+                timing=Timing(start_time=0, end_time=1),
+                model_output=f"Output {i}",
+                observations=f"Observation {i}",
+            )
+            for i in range(8)
+        ]
+
+        new_steps, new_knowledge = compressor.compress(steps)
+
+        assert isinstance(new_steps[0], CompressedHistoryStep)
+        assert "Summary of work done" in new_steps[0].summary
+        assert "<plan>Step 1 complete</plan>" in new_knowledge
+
+    def test_compress_fallback_no_summary_tags(self):
+        """When LLM doesn't use <summary> tags, entire output becomes the summary."""
+        config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2, min_compression_chars=0)
+        mock_model = MagicMock()
+        mock_model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="Plain text summary without tags.",
+            token_usage=TokenUsage(input_tokens=100, output_tokens=50),
+        )
+        compressor = ContextCompressor(config, mock_model)
+
+        steps = [
+            ActionStep(
+                step_number=i,
+                timing=Timing(start_time=0, end_time=1),
+                model_output=f"Output {i}",
+                observations=f"Observation {i}",
+            )
+            for i in range(8)
+        ]
+
+        new_steps, new_knowledge = compressor.compress(steps)
+
+        assert isinstance(new_steps[0], CompressedHistoryStep)
+        assert "Plain text summary without tags" in new_steps[0].summary
+        assert new_knowledge == ""
 
     def test_compress_preserves_task_step(self):
         config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2)
         mock_model = MagicMock()
         mock_model.generate.return_value = ChatMessage(
             role=MessageRole.ASSISTANT,
-            content="Summary",
+            content="<summary>Summary</summary>",
             token_usage=TokenUsage(input_tokens=100, output_tokens=50),
         )
         compressor = ContextCompressor(config, mock_model)
@@ -462,11 +561,11 @@ def test_compress_preserves_task_step(self):
             for i in range(10)
         ])
 
-        result = compressor.compress(steps)
+        new_steps, _ = compressor.compress(steps)
 
         # TaskStep should be first
-        assert isinstance(result[0], TaskStep)
-        assert result[0].task == "Original task"
+        assert isinstance(new_steps[0], TaskStep)
+        assert new_steps[0].task == "Original task"
 
     def test_compress_handles_model_failure_gracefully(self):
         config = CompressionConfig(max_uncompressed_steps=3, keep_recent_steps=2)
@@ -479,9 +578,10 @@ def test_compress_handles_model_failure_gracefully(self):
             for i in range(10)
         ]
 
-        # Should return original steps when compression fails
-        result = compressor.compress(steps)
-        assert result == steps
+        # Should return original steps and knowledge when compression fails
+        new_steps, new_knowledge = compressor.compress(steps)
+        assert new_steps == steps
+        assert new_knowledge == ""
 
     def test_should_merge_compressed_false_when_disabled(self):
         config = CompressionConfig(max_compressed_steps=0)

From 8890ec2dfc5f72f9fbc0d48ccf44e93fffa75733 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 20:50:51 -0300
Subject: [PATCH 08/38] docs: Update compression-plan.md for Phase 1 knowledge
 extraction

Reflects the combined summary + knowledge update feature:
- Phase 1 now extracts knowledge alongside summarization
- Updated architecture diagram showing both phases
- compress() signature updated: returns tuple[list, str]
- New parse_compression_output() documented
- create_compression_prompt() takes full knowledge string
- Knowledge store section: 3 sources (Phase 1, Phase 2, manual)
- New subsection: Phase 1 Knowledge Extraction with format details
- Updated design decisions for combined approach
- Updated test descriptions

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 docs/compression-plan.md | 83 +++++++++++++++++++++++++++++-----------
 1 file changed, 61 insertions(+), 22 deletions(-)

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
index 7b03dc007..e9f2aaff3 100644
--- a/docs/compression-plan.md
+++ b/docs/compression-plan.md
@@ -1,28 +1,35 @@
 
+
 # Context Compression & Knowledge Extraction
 
 ## Overview
-A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. When compressed summaries accumulate, they are further distilled into a persistent knowledge store using tagged XML.
+A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. Knowledge is extracted incrementally during compression and further refined when compressed summaries accumulate.
 
 ## Architecture
 
 ### Two-Phase Compression Pipeline
 
-**Phase 1 — Step Compression:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. Recent steps are kept in full detail.
+**Phase 1 — Step Compression + Knowledge Extraction:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. The same LLM call also extracts knowledge updates, which are applied to the persistent knowledge store immediately. The LLM receives the full current knowledge so it can avoid redundancy and propose corrections. Recent steps are kept in full detail.
 
-**Phase 2 — Knowledge Extraction:** When compressed steps accumulate beyond a threshold, older ones are merged into a persistent `memory.knowledge` store as tagged XML. The merged compressed steps are then removed entirely.
+**Phase 2 — Knowledge Refinement:** When compressed steps accumulate beyond a threshold, older ones are merged into the knowledge store via a separate LLM call. The merged compressed steps are then removed entirely. This phase refines and consolidates knowledge that may have been partially captured in Phase 1.
 
 ```
-Steps accumulate → compress older steps → CompressedHistoryStep summaries
-                                              ↓ (when too many accumulate)
-                                    Extract knowledge via LLM
-                                              ↓
-                                    merge_context() into memory.knowledge
-                                              ↓
-                                    Old compressed steps removed
-                                              ↓
-                                    Knowledge injected into LLM context
-                                    as <knowledge>...</knowledge> message
+Steps accumulate → Phase 1: compress older steps
+                     ↓
+                   LLM produces <summary> + optional <knowledge_updates>
+                     ↓                              ↓
+                   CompressedHistoryStep    merge_context() → memory.knowledge
+                     ↓
+                   (when too many compressed steps accumulate)
+                     ↓
+                   Phase 2: extract knowledge from old compressed steps
+                     ↓
+                   merge_context() → memory.knowledge
+                     ↓
+                   Old compressed steps removed
+                     ↓
+                   Knowledge injected into LLM context
+                   as <knowledge>...</knowledge> message
 ```
 
 ## Files
@@ -56,7 +63,7 @@ class CompressedHistoryStep(MemoryStep):
 
 class ContextCompressor:
     def should_compress(steps) -> bool
-    def compress(steps) -> list[MemoryStep]
+    def compress(steps, knowledge) -> tuple[list[MemoryStep], str]
     def should_merge_compressed(steps) -> bool
     def merge_compressed(steps, knowledge) -> tuple[list[MemoryStep], str]
 ```
@@ -65,8 +72,9 @@ Key functions:
 - `estimate_tokens(text)` — Character-based heuristic (~4 chars/token)
 - `estimate_step_tokens(step)` — Token estimate for a memory step
 - `should_preserve_step(step, config)` — Check if step must be kept
-- `create_compression_prompt(steps)` — Build LLM prompt for step summarization
-- `create_knowledge_extraction_prompt(steps, tag_names)` — Build LLM prompt for knowledge extraction
+- `create_compression_prompt(steps, knowledge)` — Build LLM prompt for step summarization with knowledge-aware context; requests structured `<summary>` + optional `<knowledge_updates>` output
+- `parse_compression_output(raw_output)` — Parse structured LLM output into `(summary, knowledge_updates)` with graceful fallback for unstructured output
+- `create_knowledge_extraction_prompt(steps, tag_names)` — Build LLM prompt for Phase 2 knowledge extraction
 - `create_merge_prompt(steps)` — Build prompt for merging compressed steps
 - `list_xml_tag_names(text)` — Extract XML tag names from a string
 - `merge_context(existing, updates)` — Apply tagged XML diff (add/update/delete)
@@ -88,6 +96,7 @@ Integration in `MultiStepAgent`:
 
 ### `src/smolagents/bp_cli.py`
 - `print_turn_summary()` shows Context and Knowledge char counts
+- `/compress` command handles tuple return from `compress()`
 - Environment variable configuration (see below)
 
 ### `tests/test_compression.py`
@@ -96,6 +105,8 @@ Tests for:
 - Token estimation functions
 - `should_preserve_step()` logic
 - `ContextCompressor.should_compress()` threshold behavior
+- `ContextCompressor.compress()` — tuple return, knowledge extraction, fallback for unstructured output
+- `parse_compression_output()` — structured output, summary-only, fallback, empty/None input
 - `merge_context()` add/update/delete operations
 - `list_xml_tag_names()` extraction
 - Integration test with mock model
@@ -111,9 +122,10 @@ The knowledge store (`memory.knowledge`) is a plain string of tagged XML:
 <current_status>API endpoints implemented, testing in progress</current_status>
 ```
 
-**Two sources of updates:**
-1. **Automatic:** `merge_compressed()` extracts knowledge from old compressed summaries (Phase 2)
-2. **Manual:** The `update_knowledge` tool lets the agent explicitly add/update/delete sections
+**Three sources of updates:**
+1. **Phase 1 (automatic):** `compress()` extracts `<knowledge_updates>` from the same LLM call that produces the summary — knowledge starts accumulating from the very first compression cycle
+2. **Phase 2 (automatic):** `merge_compressed()` extracts knowledge from old compressed summaries when they accumulate beyond the threshold — refines and consolidates
+3. **Manual:** The `update_knowledge` tool lets the agent explicitly add/update/delete sections at any time
 
 **`merge_context(existing, updates)` applies three operations:**
 - `<tag>content</tag>` where tag exists → **UPDATE** (replace content)
@@ -122,6 +134,32 @@ The knowledge store (`memory.knowledge`) is a plain string of tagged XML:
 
 **Injection:** Knowledge is inserted as a `<knowledge>...</knowledge>` USER message just before the last message in the LLM context, giving it high attention weight.
 
+### Phase 1 Knowledge Extraction
+
+During Phase 1 compression, the LLM receives:
+- The full current knowledge store as `<current_knowledge>` context
+- Instructions to output structured format:
+
+```
+<summary>
+Concise summary of compressed steps...
+</summary>
+<knowledge_updates>
+<tag>new or updated content</tag>
+<obsolete_tag/>
+</knowledge_updates>
+```
+
+The `parse_compression_output()` function handles parsing with graceful fallback:
+- If `<summary>` tags present → extract summary and knowledge_updates separately
+- If no `<summary>` tags → entire output becomes the summary (backwards compatible)
+- If no `<knowledge_updates>` → no knowledge changes applied
+
+This design means:
+- **Zero extra LLM calls** — knowledge extraction piggybacks on the existing compression call
+- **Higher fidelity** — Phase 1 has access to full original steps (not lossy summaries)
+- **Immediate availability** — knowledge accumulates from the first compression, not after 32+ steps
+
 ## BPSA CLI Configuration
 
 Environment variables (with defaults used by the CLI):
@@ -175,8 +213,9 @@ bpsa
 - **New file vs existing:** `bp_compression.py` keeps all compression/knowledge logic together, follows pattern of `monitoring.py`
 - **Callback-based:** Uses existing callback system for clean integration without modifying the agent loop
 - **Token estimation:** Character heuristic (4 chars/token) since no proactive token counting exists
-- **Graceful fallback:** If compression or knowledge extraction LLM call fails, keep original steps and log warning
-- **Two-phase design:** Step compression (lossy but retains prose summaries) feeds into knowledge extraction (structured XML) for long-term retention
+- **Graceful fallback:** If compression LLM call fails, keep original steps and log warning. If LLM doesn't follow structured format, entire output becomes the summary with no knowledge changes.
+- **Combined summary + knowledge in Phase 1:** Single LLM call produces both summary and knowledge updates. The LLM sees the full knowledge store so it can avoid redundancy and propose corrections. Zero extra cost.
+- **Two-phase design:** Phase 1 extracts knowledge from full original steps (high fidelity). Phase 2 refines/consolidates from compressed summaries when they accumulate. Both phases use `merge_context()` for consistent tagged XML operations.
 - **Tagged XML for knowledge:** Simple, parseable format that supports incremental updates via diff operations
 - **Knowledge placement:** Injected near end of context for high attention weight in transformer models
 - **Min chars threshold:** Avoids wasting LLM calls on already-concise content
@@ -184,4 +223,4 @@ bpsa
 ## Verification
 1. Run existing tests: `pytest tests/test_memory.py tests/test_agents.py`
 2. Run compression tests: `pytest tests/test_compression.py`
-3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates
+3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates from Phase 1

From 3eb86e23c866133549f1c488465f2dda2056122d Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 21:04:51 -0300
Subject: [PATCH 09/38] feat: Include compressed history in compression prompt
 for deduplication

The Phase 1 compression prompt now includes ALL existing compressed
history steps alongside the knowledge store. This gives the LLM
full context to avoid duplication:

- **Compressed history** = chronological record of past events/changes.
  The LLM is told not to repeat any already-summarized events.
- **Knowledge** = current beliefs/facts. The LLM can propose updates
  when the execution history reveals corrections or new information.

The prompt clearly explains the distinction between the two stores:
history captures what happened over time, knowledge captures current
truth. The new summary goes into history; knowledge updates are
extracted separately.

Changes to bp_compression.py:
- create_compression_prompt() now accepts existing_summaries parameter
- Prompt includes <compressed_history> section when summaries exist
- Deduplication instructions reference both history and knowledge
- compress() collects existing CompressedHistoryStep instances and
  passes them to the prompt

Changes to tests/test_compression.py:
- New test: prompt with existing summaries
- New test: prompt with both history and knowledge
- Updated assertion for knowledge-aware prompt

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_compression.py | 98 ++++++++++++++++++++++----------
 tests/test_compression.py        | 51 ++++++++++++++++-
 2 files changed, 118 insertions(+), 31 deletions(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 6943c9ca5..9024a0e03 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -238,19 +238,24 @@ def should_preserve_step(step: MemoryStep, config: CompressionConfig) -> bool:
 def create_compression_prompt(
     steps_to_compress: list[MemoryStep],
     knowledge: str = "",
+    existing_summaries: list["CompressedHistoryStep"] | None = None,
 ) -> str:
     """Create the prompt for the compression LLM call.
 
     Builds a structured representation of the steps to compress and asks
     the LLM to generate a concise summary preserving key information.
 
-    When knowledge is provided, the full knowledge store is included so the
-    LLM can avoid redundancy and also propose knowledge updates (corrections,
-    new findings) as part of its output.
+    The prompt provides two types of existing context to avoid duplication:
+    - **Compressed history** (existing_summaries): chronological record of past events
+      and changes. The new summary should complement, not repeat, this history.
+    - **Knowledge** (knowledge): current beliefs and facts. The LLM can propose
+      updates to knowledge when the execution history reveals corrections or
+      important new information.
 
     Args:
         steps_to_compress: List of memory steps to summarize.
         knowledge: Current knowledge store content (tagged XML). Empty string if none.
+        existing_summaries: Already-compressed history steps to avoid duplicating.
 
     Returns:
         The prompt string for the compression LLM call.
@@ -278,17 +283,61 @@ def create_compression_prompt(
 
     steps_text = "<\n>".join(step_descriptions)
 
-    if knowledge and knowledge.strip():
+    # Build compressed history section
+    history_section = ""
+    if existing_summaries:
+        history_parts = []
+        for s in existing_summaries:
+            history_parts.append(s.summary)
+        history_text = "\n---\n".join(history_parts)
+        history_section = f"""
+The following is the compressed history of earlier work (events and changes over time).
+Do NOT repeat any information already captured in the compressed history.
+Your summary should only describe NEW events, actions, and changes from the execution history below.
+
+<compressed_history>
+{history_text}
+</compressed_history>
+"""
+
+    # Build knowledge section
+    has_knowledge = knowledge and knowledge.strip()
+    has_history = bool(existing_summaries)
+
+    if has_knowledge:
         knowledge_section = f"""
-The agent has a persistent knowledge store with the following content:
+The agent has a persistent knowledge store containing current beliefs and facts:
 <current_knowledge>
 {knowledge}
 </current_knowledge>
+"""
+    else:
+        knowledge_section = ""
+
+    # Build deduplication and output instructions
+    dedup_parts = []
+    if has_history:
+        dedup_parts.append("the compressed history (past events)")
+    if has_knowledge:
+        dedup_parts.append("the knowledge store (current facts)")
+
+    if dedup_parts:
+        dedup_instruction = f"Do NOT repeat information already in {' or '.join(dedup_parts)}."
+    else:
+        dedup_instruction = ""
+
+    output_instruction = f"""
+{dedup_instruction}
+
+There are two distinct stores:
+- **Compressed history** captures events, changes, and what happened over time.
+- **Knowledge** captures current beliefs, facts, and the latest state of things.
+
+Your summary will be added to the compressed history. It should describe what happened
+(events, actions, outcomes, changes) without repeating prior history entries.
 
-Your summary should NOT repeat information already in knowledge.
-If the execution history contains corrections or important new information that
-should update the knowledge store, include a <knowledge_updates> section after
-your summary. Use XML tags to add, update, or delete knowledge sections:
+If the execution history reveals important new facts or corrections to existing knowledge,
+include a <knowledge_updates> section. Use XML tags to add, update, or delete sections:
 - To ADD or UPDATE: <tag_name>new content</tag_name>
 - To DELETE an obsolete section: <tag_name/>
 
@@ -296,33 +345,17 @@ def create_compression_prompt(
 
 Output format:
 <summary>
-Your concise summary here...
+Your concise summary of new events and changes...
 </summary>
 <knowledge_updates>
 ...tagged updates if any...
 </knowledge_updates>
-"""
-    else:
-        knowledge_section = """
-If the execution history contains important information worth remembering
-long-term, include a <knowledge_updates> section after your summary with
-tagged XML sections (e.g., <plan>...</plan>, <architecture>...</architecture>).
-
-If no knowledge is worth extracting yet, omit the <knowledge_updates> section.
-
-Output format:
-<summary>
-Your concise summary here...
-</summary>
-<knowledge_updates>
-...tagged sections if any...
-</knowledge_updates>
 """
 
     return f"""Summarize the following agent execution history into a concise summary.
 {COMMON_COMPRESSION_INSTRUCTIONS}
-{knowledge_section}
-This is the execution history:
+{history_section}{knowledge_section}{output_instruction}
+This is the execution history to summarize:
 <execution_history>
 {steps_text}
 </execution_history>
@@ -666,8 +699,13 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> tuple[list[M
                 logger.info(f"Compression skipped: content ({original_chars} chars) < min_compression_chars ({self.config.min_compression_chars})")
             return steps, knowledge
 
-        # Generate summary using LLM (knowledge-aware: also extracts knowledge updates)
-        compression_prompt = create_compression_prompt(steps_to_compress, knowledge)
+        # Collect existing compressed history for deduplication
+        existing_summaries = [s for s in steps if isinstance(s, CompressedHistoryStep)]
+
+        # Generate summary using LLM (history + knowledge aware)
+        compression_prompt = create_compression_prompt(
+            steps_to_compress, knowledge, existing_summaries
+        )
 
         try:
             summary_message = self.compression_model.generate(
diff --git a/tests/test_compression.py b/tests/test_compression.py
index 82b5578d5..59a797a1b 100644
--- a/tests/test_compression.py
+++ b/tests/test_compression.py
@@ -295,7 +295,7 @@ def test_creates_prompt_with_knowledge(self):
         assert "<current_knowledge>" in prompt
         assert "plan" in prompt
         assert "architecture" in prompt
-        assert "should NOT repeat" in prompt
+        assert "Do NOT repeat" in prompt
         assert "<knowledge_updates>" in prompt
 
     def test_creates_prompt_without_knowledge(self):
@@ -330,6 +330,55 @@ def test_creates_prompt_with_empty_knowledge(self):
 
 
 
+    def test_creates_prompt_with_existing_summaries(self):
+        steps = [
+            ActionStep(
+                step_number=5,
+                model_input_messages=[],
+                model_output="Implemented API endpoint",
+                observations="Tests pass",
+                model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Done"),
+                timing=Timing(start_time=0, end_time=1),
+            ),
+        ]
+        summaries = [
+            CompressedHistoryStep(
+                summary="Set up database and created schema.",
+                compressed_step_numbers=[1, 2],
+                original_step_count=2,
+            ),
+        ]
+        prompt = create_compression_prompt(steps, knowledge="", existing_summaries=summaries)
+        assert "<compressed_history>" in prompt
+        assert "Set up database" in prompt
+        assert "Do NOT repeat" in prompt
+
+    def test_creates_prompt_with_both_history_and_knowledge(self):
+        steps = [
+            ActionStep(
+                step_number=5,
+                model_input_messages=[],
+                model_output="Fixed the bug",
+                observations="All tests pass",
+                model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Done"),
+                timing=Timing(start_time=0, end_time=1),
+            ),
+        ]
+        summaries = [
+            CompressedHistoryStep(
+                summary="Explored codebase and found the bug.",
+                compressed_step_numbers=[1, 2, 3],
+                original_step_count=3,
+            ),
+        ]
+        knowledge = "<architecture>REST API with PostgreSQL</architecture>"
+        prompt = create_compression_prompt(steps, knowledge=knowledge, existing_summaries=summaries)
+        assert "<compressed_history>" in prompt
+        assert "<current_knowledge>" in prompt
+        assert "compressed history" in prompt.lower()
+        assert "knowledge" in prompt.lower()
+
+
 
 class TestParseCompressionOutput:
     def test_parses_structured_output(self):

From 227a54b69974095f666e0c2f1bef89e0c2b168f9 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 21:05:11 -0300
Subject: [PATCH 10/38] docs: Update compression-plan.md for compressed history
 inclusion in prompt

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 docs/compression-plan.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
index e9f2aaff3..3a7bf2a67 100644
--- a/docs/compression-plan.md
+++ b/docs/compression-plan.md
@@ -9,7 +9,7 @@ A hybrid rolling summarization system for smolagents that compresses older memor
 
 ### Two-Phase Compression Pipeline
 
-**Phase 1 — Step Compression + Knowledge Extraction:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. The same LLM call also extracts knowledge updates, which are applied to the persistent knowledge store immediately. The LLM receives the full current knowledge so it can avoid redundancy and propose corrections. Recent steps are kept in full detail.
+**Phase 1 — Step Compression + Knowledge Extraction:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. The same LLM call also extracts knowledge updates, which are applied to the persistent knowledge store immediately. The LLM receives both the full compressed history (past events) and the full knowledge store (current facts) so it can avoid all duplication and propose corrections. Recent steps are kept in full detail.
 
 **Phase 2 — Knowledge Refinement:** When compressed steps accumulate beyond a threshold, older ones are merged into the knowledge store via a separate LLM call. The merged compressed steps are then removed entirely. This phase refines and consolidates knowledge that may have been partially captured in Phase 1.
 
@@ -72,7 +72,7 @@ Key functions:
 - `estimate_tokens(text)` — Character-based heuristic (~4 chars/token)
 - `estimate_step_tokens(step)` — Token estimate for a memory step
 - `should_preserve_step(step, config)` — Check if step must be kept
-- `create_compression_prompt(steps, knowledge)` — Build LLM prompt for step summarization with knowledge-aware context; requests structured `<summary>` + optional `<knowledge_updates>` output
+- `create_compression_prompt(steps, knowledge, existing_summaries)` — Build LLM prompt for step summarization with full context: existing compressed history (to avoid duplicating events) and knowledge store (current facts, updatable). Requests structured `<summary>` + optional `<knowledge_updates>` output
 - `parse_compression_output(raw_output)` — Parse structured LLM output into `(summary, knowledge_updates)` with graceful fallback for unstructured output
 - `create_knowledge_extraction_prompt(steps, tag_names)` — Build LLM prompt for Phase 2 knowledge extraction
 - `create_merge_prompt(steps)` — Build prompt for merging compressed steps
@@ -214,7 +214,7 @@ bpsa
 - **Callback-based:** Uses existing callback system for clean integration without modifying the agent loop
 - **Token estimation:** Character heuristic (4 chars/token) since no proactive token counting exists
 - **Graceful fallback:** If compression LLM call fails, keep original steps and log warning. If LLM doesn't follow structured format, entire output becomes the summary with no knowledge changes.
-- **Combined summary + knowledge in Phase 1:** Single LLM call produces both summary and knowledge updates. The LLM sees the full knowledge store so it can avoid redundancy and propose corrections. Zero extra cost.
+- **Combined summary + knowledge in Phase 1:** Single LLM call produces both summary and knowledge updates. The LLM sees the full compressed history AND knowledge store so it can avoid all duplication. The prompt explains the distinction: compressed history = events/changes over time, knowledge = current beliefs/facts. Zero extra cost.
 - **Two-phase design:** Phase 1 extracts knowledge from full original steps (high fidelity). Phase 2 refines/consolidates from compressed summaries when they accumulate. Both phases use `merge_context()` for consistent tagged XML operations.
 - **Tagged XML for knowledge:** Simple, parseable format that supports incremental updates via diff operations
 - **Knowledge placement:** Injected near end of context for high attention weight in transformer models

From b34cf90704241805502f1b0065cb7017e49c7307 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 21:27:10 -0300
Subject: [PATCH 11/38] Add full system prompt descriptions for plan,
 summarize_actionstep, move_actionstep_to_memory, and
 move_actionstep_from_memory tools

Set should_add_tool_description_into_system_prompt = True for these four tools
so their full descriptions are included in the system prompt, matching the
behavior already present for update_knowledge.

Model: claude-opus-4.6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_tools.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/smolagents/bp_tools.py b/src/smolagents/bp_tools.py
index fdf1b0be1..b52d3de2d 100644
--- a/src/smolagents/bp_tools.py
+++ b/src/smolagents/bp_tools.py
@@ -2541,6 +2541,7 @@ class PlanningTool(Tool):
     """
 
     name = "plan"
+    should_add_tool_description_into_system_prompt = True
     description = (
         "Call this tool whenever you need help to create or update your plan. "
         "Use it when starting a complex task, when your current approach is failing, "
@@ -2647,6 +2648,7 @@ class MoveActionStepToMemory(Tool):
     """
 
     name = "move_actionstep_to_memory"
+    should_add_tool_description_into_system_prompt = True
     description = (
         "Move content from a specific ActionStep out of the active context into memory. "
         "This reduces context size while preserving the original content for later retrieval. "
@@ -2726,6 +2728,7 @@ class RetrieveActionStepFromMemory(Tool):
     """
 
     name = "move_actionstep_from_memory"
+    should_add_tool_description_into_system_prompt = True
     description = (
         "Restore content that was previously moved to memory back into the active context. "
         "Use this when you need to re-examine a step's response or model_output that was archived. "
@@ -2802,6 +2805,7 @@ class SummarizeActionStep(Tool):
     """
 
     name = "summarize_actionstep"
+    should_add_tool_description_into_system_prompt = True
     description = (
         "Summarize content from a specific ActionStep using custom instructions. "
         "This replaces the content with an LLM-generated summary while archiving the original for later retrieval. "

From a897fd2f3d94cb06c45000ce51702377230c411d Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 22:18:39 -0300
Subject: [PATCH 12/38] feat: show knowledge char count in step summary

Add knowledge char count to the per-step summary line shown in the console.

Changes:
- monitoring.py: Monitor.__init__ now accepts optional memory parameter
- monitoring.py: update_metrics appends knowledge char count when present
- agents.py: passes self.memory to Monitor

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py     | 2 +-
 src/smolagents/monitoring.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 267696f86..5c4052df6 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -366,7 +366,7 @@ def __init__(
         else:
             self.logger = logger
 
-        self.monitor = Monitor(self.model, self.logger)
+        self.monitor = Monitor(self.model, self.logger, memory=self.memory)
         self._setup_step_callbacks(step_callbacks)
         self._setup_compression(compression_config)
         self.stream_outputs = False
diff --git a/src/smolagents/monitoring.py b/src/smolagents/monitoring.py
index 3e54ad0a6..8e5289103 100644
--- a/src/smolagents/monitoring.py
+++ b/src/smolagents/monitoring.py
@@ -79,10 +79,11 @@ def __repr__(self) -> str:
 
 
 class Monitor:
-    def __init__(self, tracked_model, logger):
+    def __init__(self, tracked_model, logger, memory=None):
         self.step_durations = []
         self.tracked_model = tracked_model
         self.logger = logger
+        self.memory = memory
         self.total_input_token_count = 0
         self.total_output_token_count = 0
 
@@ -121,6 +122,9 @@ def update_metrics(self, step_log):
             step_log.context_chars = ctx_chars
             console_outputs += f"| Context: {ctx_chars:,} chars"
 
+        knowledge = getattr(self.memory, "knowledge", "") if self.memory else ""
+        if knowledge:
+            console_outputs += f"| Knowledge: {len(knowledge):,} chars"
         console_outputs += "]"
         self.logger.log(Text(console_outputs, style="dim"), level=1)
 

From 8d404cb690901adf260494c3d4659f20ad1afba0 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 22:30:32 -0300
Subject: [PATCH 13/38] Show knowledge char count in /show-stats even when
 empty

The Knowledge store row in print_stats() was previously only shown
when knowledge was non-empty. Now it always displays the character
count (showing 0 when empty) so users can always see the knowledge
store size in /show-stats.

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 093ee3c56..b00369080 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -874,9 +874,9 @@ def print_stats(session_stats: dict, agent=None):
         table.add_row("  Tool descriptions", f"{breakdown['tools']:,} chars")
     if agent:
         knowledge = getattr(agent.memory, "knowledge", "")
-        if knowledge:
-            table.add_row("", "")
-            table.add_row("Knowledge store", f"{len(knowledge):,} chars")
+        knowledge_chars = len(knowledge) if knowledge else 0
+        table.add_row("", "")
+        table.add_row("Knowledge store", f"{knowledge_chars:,} chars")
     console.print(table)
     console.print()
 

From a2f3610e373090623d85d634639e1a1b4a5ba509 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 23:26:03 -0300
Subject: [PATCH 14/38] UI: rename 'Knowledge store' to 'Knowledge' in user
 interface labels

Generated-by: claude-sonnet-4.6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index b00369080..78d841c5d 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -876,7 +876,7 @@ def print_stats(session_stats: dict, agent=None):
         knowledge = getattr(agent.memory, "knowledge", "")
         knowledge_chars = len(knowledge) if knowledge else 0
         table.add_row("", "")
-        table.add_row("Knowledge store", f"{knowledge_chars:,} chars")
+        table.add_row("Knowledge", f"{knowledge_chars:,} chars")
     console.print(table)
     console.print()
 
@@ -1037,7 +1037,7 @@ def cmd_compression_stats(agent):
     stats_table.add_row("Original steps compressed", str(compressed_original))
     stats_table.add_row("Compression runs", str(compression_count))
     stats_table.add_row("Compressed summary chars", f"{total_chars:,}")
-    stats_table.add_row("Knowledge store chars", f"{knowledge_chars:,}")
+    stats_table.add_row("Knowledge chars", f"{knowledge_chars:,}")
     stats_table.add_row("Knowledge sections", f"{len(knowledge_tags)} ({', '.join(knowledge_tags)})" if knowledge_tags else "0")
     console.print(stats_table)
     console.print()
@@ -1082,7 +1082,7 @@ def cmd_memory_stats(agent):
     knowledge_chars = len(knowledge)
     table.add_row("Total chars", f"{total_chars:,}")
     table.add_row("Estimated tokens", f"{total_tokens:,}")
-    table.add_row("Knowledge store chars", f"{knowledge_chars:,}")
+    table.add_row("Knowledge chars", f"{knowledge_chars:,}")
     console.print(table)
     console.print()
 

From f488299ac2fe6202b5ded8a988078c803f7ddd09 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 23:28:42 -0300
Subject: [PATCH 15/38] Add /show-knowledge command to display full knowledge
 store content

Generated-by: claude-sonnet-4.6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 78d841c5d..ffc361631 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -608,7 +608,7 @@ def _save_aliases(aliases: dict):
     "/load-instructions", "/plan", "/pwd", "/redo", "/repeat", "/repeat-prompt", "/run-prompt", "/run-py", "/save",
     "/session-load", "/session-save",
     "/show-compression-stats", "/show-memory-stats", "/show-stats",
-    "/save-step", "/set-max-steps", "/show-step", "/show-steps", "/show-tools", "/undo-steps", "/verbose",
+    "/save-step", "/set-max-steps", "/show-knowledge", "/show-step", "/show-steps", "/show-tools", "/undo-steps", "/verbose",
 ]
 
 
@@ -647,6 +647,7 @@ def print_help():
     table.add_row("/show-memory-stats", "Show memory breakdown: steps, tokens, compressed vs uncompressed")
     table.add_row("/show-step <N>", "Show full content of a specific step")
     table.add_row("/show-steps", "Show one-line summary of all memory steps")
+    table.add_row("/show-knowledge", "Show the full content of the knowledge store")
     table.add_row("/show-stats", "Show session statistics")
     table.add_row("/set-max-steps <N>", "Change max_steps for the agent")
     table.add_row("/show-tools", "List all loaded tools")
@@ -1502,6 +1503,23 @@ def cmd_show_steps(agent):
     console.print()
 
 
+
+def cmd_show_knowledge(agent):
+    """Show the full content of the knowledge store."""
+    from smolagents.bp_compression import list_xml_tag_names
+
+    knowledge = getattr(agent.memory, "knowledge", "")
+    if not knowledge:
+        console.print("[yellow]Knowledge is empty.[/]")
+        return
+
+    knowledge_tags = list_xml_tag_names(knowledge)
+    sections_info = f"{len(knowledge_tags)} section(s): {', '.join(knowledge_tags)}" if knowledge_tags else "no sections"
+    console.print(Rule(f"[bold]Knowledge[/] [dim]({len(knowledge):,} chars, {sections_info})[/]", style="blue"))
+    console.print(knowledge)
+    console.print()
+
+
 def cmd_undo(agent, args: str):
     """Remove the last N steps from agent memory. Default N=1."""
     from smolagents.memory import SystemPromptStep
@@ -2015,6 +2033,9 @@ def get_input():
             elif cmd == "/show-steps":
                 cmd_show_steps(agent)
                 continue
+            elif cmd == "/show-knowledge":
+                cmd_show_knowledge(agent)
+                continue
             elif cmd == "/undo-steps":
                 cmd_undo(agent, cmd_args)
                 continue

From f3b3f00fd1ade4e522749498558ed421e67ca94b Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 23:33:47 -0300
Subject: [PATCH 16/38] Fix SyntaxWarning: use raw strings for \[ in help table
 rows

Generated-by: claude-sonnet-4.6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index ffc361631..d8bbcca28 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -620,11 +620,11 @@ def print_help():
     table.add_row("!!<command>", "Run an OS command; output is appended to the next prompt sent to the agent")
     table.add_row("!!!<command>", "Run an OS command and immediately send the output to the agent for analysis")
     table.add_row("/alias <name> <value>", "Define alias (saved to ~/.bpsa_aliases). No args=list, -d <name>=delete")
-    table.add_row("/auto-approve \[on|off]", "Toggle or set auto-approve for tag execution")
+    table.add_row(r"/auto-approve \[on|off]", "Toggle or set auto-approve for tag execution")
     table.add_row("/cd <dir>", "Change working directory")
     table.add_row("/clear", "Clear screen, reset agent and conversation history")
-    table.add_row("/compress \[N]", "Force compression now, or compress a specific step N")
-    table.add_row("/compression \[on|off]", "Toggle compression on/off")
+    table.add_row(r"/compress \[N]", "Force compression now, or compress a specific step N")
+    table.add_row(r"/compression \[on|off]", "Toggle compression on/off")
     table.add_row("/compression-keep-recent-steps <N>", "Change keep_recent_steps")
     table.add_row("/compression-max-uncompressed-steps <N>", "Change max_uncompressed_steps")
     table.add_row("/compression-model <model>", "Switch compression model")
@@ -632,7 +632,7 @@ def print_help():
     table.add_row("/exit", "Exit the REPL")
     table.add_row("/help", "Show this help message")
     table.add_row("/load-instructions", "Load agent instruction files into next prompt")
-    table.add_row("/plan \[on|off|N]", "Toggle or set planning interval (default: 22)")
+    table.add_row(r"/plan \[on|off|N]", "Toggle or set planning interval (default: 22)")
     table.add_row("/pwd", "Show current working directory")
     table.add_row("/redo", "Re-run the last prompt (undo last steps and run again)")
     table.add_row("/repeat <N> <prompt>", "Run the same prompt N times, each on a fresh agent with current context")
@@ -651,7 +651,7 @@ def print_help():
     table.add_row("/show-stats", "Show session statistics")
     table.add_row("/set-max-steps <N>", "Change max_steps for the agent")
     table.add_row("/show-tools", "List all loaded tools")
-    table.add_row("/undo-steps \[N]", "Remove last N steps from memory (default: 1)")
+    table.add_row(r"/undo-steps \[N]", "Remove last N steps from memory (default: 1)")
     table.add_row("/verbose", "Toggle verbose output")
     console.print(table)
     console.print()

From 19fd51455aee58e6ba062fac0d38008b3a89fe3b Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 23:43:47 -0300
Subject: [PATCH 17/38] Add /compression-keep-compressed-steps and
 /compression-max-compressed-steps commands

Completes the compression CLI command set by adding runtime control over
the two remaining parameters: keep_compressed_steps and max_compressed_steps.

Changes:
- cmd_compression_keep_compressed(): new function to change keep_compressed_steps
- cmd_compression_max_compressed(): new function to change max_compressed_steps
- Added both commands to SLASH_COMMANDS autocomplete list
- Added both commands to /help table
- Added both dispatch handlers in main REPL loop

Model: claude-sonnet-4-6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 51 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index d8bbcca28..9a086bd13 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -603,7 +603,8 @@ def _save_aliases(aliases: dict):
 
 SLASH_COMMANDS = [
     "/alias", "/auto-approve", "/cd", "/clear", "/compress", "/compression",
-    "/compression-keep-recent-steps", "/compression-max-uncompressed-steps",
+    "/compression-keep-recent-steps", "/compression-keep-compressed-steps",
+    "/compression-max-uncompressed-steps", "/compression-max-compressed-steps",
     "/compression-model", "/dictation", "/exit", "/help",
     "/load-instructions", "/plan", "/pwd", "/redo", "/repeat", "/repeat-prompt", "/run-prompt", "/run-py", "/save",
     "/session-load", "/session-save",
@@ -627,6 +628,8 @@ def print_help():
     table.add_row(r"/compression \[on|off]", "Toggle compression on/off")
     table.add_row("/compression-keep-recent-steps <N>", "Change keep_recent_steps")
     table.add_row("/compression-max-uncompressed-steps <N>", "Change max_uncompressed_steps")
+    table.add_row("/compression-keep-compressed-steps <N>", "Change keep_compressed_steps")
+    table.add_row("/compression-max-compressed-steps <N>", "Change max_compressed_steps")
     table.add_row("/compression-model <model>", "Switch compression model")
     table.add_row(r"/dictation \[on|off]", "Toggle dictation (requires BPSA_DICTATION_TRANSCRIBER)")
     table.add_row("/exit", "Exit the REPL")
@@ -1224,6 +1227,46 @@ def cmd_compression_max_uncompressed(agent, args: str):
         console.print("[red]Invalid number. Usage: /compression-max-uncompressed-steps <N>[/]")
 
 
+def cmd_compression_keep_compressed(agent, args: str):
+    """Change keep_compressed_steps."""
+    config = _get_compression_config(agent)
+    if config is None:
+        return
+    args = args.strip()
+    if not args:
+        console.print(f"[cyan]Current keep_compressed_steps: {config.keep_compressed_steps}[/]")
+        console.print("[dim]Usage: /compression-keep-compressed-steps <N>[/]")
+        return
+    try:
+        n = int(args)
+        if n < 0:
+            raise ValueError
+        config.keep_compressed_steps = n
+        console.print(f"[green]keep_compressed_steps set to {n}[/]")
+    except ValueError:
+        console.print("[red]Invalid number. Usage: /compression-keep-compressed-steps <N>[/]")
+
+
+def cmd_compression_max_compressed(agent, args: str):
+    """Change max_compressed_steps."""
+    config = _get_compression_config(agent)
+    if config is None:
+        return
+    args = args.strip()
+    if not args:
+        console.print(f"[cyan]Current max_compressed_steps: {config.max_compressed_steps}[/]")
+        console.print("[dim]Usage: /compression-max-compressed-steps <N>[/]")
+        return
+    try:
+        n = int(args)
+        if n < 0:
+            raise ValueError
+        config.max_compressed_steps = n
+        console.print(f"[green]max_compressed_steps set to {n}[/]")
+    except ValueError:
+        console.print("[red]Invalid number. Usage: /compression-max-compressed-steps <N>[/]")
+
+
 def cmd_compression_model(agent, args: str):
     """Switch compression model."""
     config = _get_compression_config(agent)
@@ -2021,6 +2064,12 @@ def get_input():
             elif cmd == "/compression-max-uncompressed-steps":
                 cmd_compression_max_uncompressed(agent, cmd_args)
                 continue
+            elif cmd == "/compression-keep-compressed-steps":
+                cmd_compression_keep_compressed(agent, cmd_args)
+                continue
+            elif cmd == "/compression-max-compressed-steps":
+                cmd_compression_max_compressed(agent, cmd_args)
+                continue
             elif cmd == "/compression-model":
                 cmd_compression_model(agent, cmd_args)
                 continue

From e0622be9d7b3ac8862c1aa0cfbb861704be60e85 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 23:49:56 -0300
Subject: [PATCH 18/38] Auto-try .json extension in /session-load when file not
 found

If the user types /session-load mysession and the file is not found,
and no extension was given, automatically retry with mysession.json.
If the .json fallback also fails, the error message says:
  File not found: mysession (also tried mysession.json)

Model: claude-sonnet-4-6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 9a086bd13..843709ace 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -1352,8 +1352,15 @@ def cmd_session_load(agent, args: str) -> dict | None:
         console.print("[yellow]Usage: /session-load <filename>[/]")
         return None
     if not os.path.isfile(filename):
-        console.print(f"[red]File not found: {filename}[/]")
-        return None
+        # Try appending .json if no extension was given
+        if not os.path.splitext(filename)[1] and os.path.isfile(filename + ".json"):
+            filename = filename + ".json"
+        else:
+            if not os.path.splitext(filename)[1]:
+                console.print(f"[red]File not found: {filename} (also tried {filename}.json)[/]")
+            else:
+                console.print(f"[red]File not found: {filename}[/]")
+            return None
     try:
         stats = load_session(filename, agent)
         step_count = len(agent.memory.steps)

From 5c849748bef983c6c7e7ba0af6bdfa173c7f407d Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Sun, 1 Mar 2026 23:57:13 -0300
Subject: [PATCH 19/38] Add /compression-set-high, /compression-set-normal,
 /compression-set-low presets

Three new commands that set all 4 compression parameters at once:

/compression-set-high:   keep_recent=10, max_uncompressed=13, keep_compressed=10, max_compressed=13
/compression-set-normal: keep_recent=40, max_uncompressed=50, keep_compressed=10, max_compressed=20
/compression-set-low:    keep_recent=90, max_uncompressed=100, keep_compressed=20, max_compressed=40

Each command prints a confirmation table of all 4 applied values.
Model: claude-sonnet-4-6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 73 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 843709ace..e3540964f 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -605,6 +605,7 @@ def _save_aliases(aliases: dict):
     "/alias", "/auto-approve", "/cd", "/clear", "/compress", "/compression",
     "/compression-keep-recent-steps", "/compression-keep-compressed-steps",
     "/compression-max-uncompressed-steps", "/compression-max-compressed-steps",
+    "/compression-set-high", "/compression-set-low", "/compression-set-normal",
     "/compression-model", "/dictation", "/exit", "/help",
     "/load-instructions", "/plan", "/pwd", "/redo", "/repeat", "/repeat-prompt", "/run-prompt", "/run-py", "/save",
     "/session-load", "/session-save",
@@ -630,6 +631,9 @@ def print_help():
     table.add_row("/compression-max-uncompressed-steps <N>", "Change max_uncompressed_steps")
     table.add_row("/compression-keep-compressed-steps <N>", "Change keep_compressed_steps")
     table.add_row("/compression-max-compressed-steps <N>", "Change max_compressed_steps")
+    table.add_row("/compression-set-high", "Set compression preset: HIGH (aggressive)")
+    table.add_row("/compression-set-normal", "Set compression preset: NORMAL (balanced)")
+    table.add_row("/compression-set-low", "Set compression preset: LOW (conservative)")
     table.add_row("/compression-model <model>", "Switch compression model")
     table.add_row(r"/dictation \[on|off]", "Toggle dictation (requires BPSA_DICTATION_TRANSCRIBER)")
     table.add_row("/exit", "Exit the REPL")
@@ -1267,6 +1271,66 @@ def cmd_compression_max_compressed(agent, args: str):
         console.print("[red]Invalid number. Usage: /compression-max-compressed-steps <N>[/]")
 
 
+def cmd_compression_set_high(agent):
+    """Set compression to HIGH preset (aggressive)."""
+    config = _get_compression_config(agent)
+    if config is None:
+        return
+    config.keep_recent_steps = 10
+    config.max_uncompressed_steps = 13
+    config.keep_compressed_steps = 10
+    config.max_compressed_steps = 13
+    table = Table(show_header=False, box=None)
+    table.add_column(style="cyan", no_wrap=True)
+    table.add_column(style="green")
+    table.add_row("Compression preset", "HIGH")
+    table.add_row("keep_recent_steps", "10")
+    table.add_row("max_uncompressed_steps", "13")
+    table.add_row("keep_compressed_steps", "10")
+    table.add_row("max_compressed_steps", "13")
+    console.print(table)
+
+
+def cmd_compression_set_normal(agent):
+    """Set compression to NORMAL preset (balanced)."""
+    config = _get_compression_config(agent)
+    if config is None:
+        return
+    config.keep_recent_steps = 40
+    config.max_uncompressed_steps = 50
+    config.keep_compressed_steps = 10
+    config.max_compressed_steps = 20
+    table = Table(show_header=False, box=None)
+    table.add_column(style="cyan", no_wrap=True)
+    table.add_column(style="green")
+    table.add_row("Compression preset", "NORMAL")
+    table.add_row("keep_recent_steps", "40")
+    table.add_row("max_uncompressed_steps", "50")
+    table.add_row("keep_compressed_steps", "10")
+    table.add_row("max_compressed_steps", "20")
+    console.print(table)
+
+
+def cmd_compression_set_low(agent):
+    """Set compression to LOW preset (conservative)."""
+    config = _get_compression_config(agent)
+    if config is None:
+        return
+    config.keep_recent_steps = 90
+    config.max_uncompressed_steps = 100
+    config.keep_compressed_steps = 20
+    config.max_compressed_steps = 40
+    table = Table(show_header=False, box=None)
+    table.add_column(style="cyan", no_wrap=True)
+    table.add_column(style="green")
+    table.add_row("Compression preset", "LOW")
+    table.add_row("keep_recent_steps", "90")
+    table.add_row("max_uncompressed_steps", "100")
+    table.add_row("keep_compressed_steps", "20")
+    table.add_row("max_compressed_steps", "40")
+    console.print(table)
+
+
 def cmd_compression_model(agent, args: str):
     """Switch compression model."""
     config = _get_compression_config(agent)
@@ -2077,6 +2141,15 @@ def get_input():
             elif cmd == "/compression-max-compressed-steps":
                 cmd_compression_max_compressed(agent, cmd_args)
                 continue
+            elif cmd == "/compression-set-high":
+                cmd_compression_set_high(agent)
+                continue
+            elif cmd == "/compression-set-normal":
+                cmd_compression_set_normal(agent)
+                continue
+            elif cmd == "/compression-set-low":
+                cmd_compression_set_low(agent)
+                continue
             elif cmd == "/compression-model":
                 cmd_compression_model(agent, cmd_args)
                 continue

From 03ef5f50b545c0a07fb508b1ccb3bf5f9f13630a Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 00:14:50 -0300
Subject: [PATCH 20/38] Better compression prompt.

---
 src/smolagents/bp_compression.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 9024a0e03..339d03fdd 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -341,6 +341,8 @@ def create_compression_prompt(
 - To ADD or UPDATE: <tag_name>new content</tag_name>
 - To DELETE an obsolete section: <tag_name/>
 
+You can add/update/delete as many <tag_name>s as you see fit. 
+
 If no knowledge updates are needed, omit the <knowledge_updates> section entirely.
 
 Output format:
@@ -352,7 +354,8 @@ def create_compression_prompt(
 </knowledge_updates>
 """
 
-    return f"""Summarize the following agent execution history into a concise summary.
+    return f"""Hello super-intelligence!
+To your own benefit, please summarize the following agent execution history into a concise summary.
 {COMMON_COMPRESSION_INSTRUCTIONS}
 {history_section}{knowledge_section}{output_instruction}
 This is the execution history to summarize:

From c92ce3af5e1dc81a77b3efbe3784c1517df39884 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 13:49:33 -0300
Subject: [PATCH 21/38] =?UTF-8?q?Rename=20REPL=20commands:=20/compression-?=
 =?UTF-8?q?set-normal=E2=86=92medium,=20/load-instructions=E2=86=92/instru?=
 =?UTF-8?q?ctions-load?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename /compression-set-normal to /compression-set-medium in bp_cli.py
  (autocomplete list, help table, command handler, description text)
- Rename /load-instructions to /instructions-load in bp_cli.py
  (autocomplete list, help table, command handler)
- Update CLI.md to reflect /instructions-load rename
- Alphabetical order verified and maintained in all affected lists

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 CLI.md                   |  2 +-
 src/smolagents/bp_cli.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/CLI.md b/CLI.md
index 6ac629f84..3d02c5bee 100644
--- a/CLI.md
+++ b/CLI.md
@@ -164,7 +164,7 @@ Use `prompt_toolkit` for:
 | `/compression-model <model>` | Switch compression model |
 | `/exit` | Exit the REPL |
 | `/help` | Show available commands and brief descriptions |
-| `/load-instructions` | Load agent instruction files into next prompt |
+| `/instructions-load` | Load agent instruction files into next prompt |
 | `/plan [on\|off\|N]` | Toggle or set planning interval (default: 22) |
 | `/pwd` | Show current working directory |
 | `/repeat <N> <prompt>` | Run the same prompt N times, each on a fresh agent with current context |
diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index e3540964f..68ebfc1d8 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -605,9 +605,9 @@ def _save_aliases(aliases: dict):
     "/alias", "/auto-approve", "/cd", "/clear", "/compress", "/compression",
     "/compression-keep-recent-steps", "/compression-keep-compressed-steps",
     "/compression-max-uncompressed-steps", "/compression-max-compressed-steps",
-    "/compression-set-high", "/compression-set-low", "/compression-set-normal",
+    "/compression-set-high", "/compression-set-low", "/compression-set-medium",
     "/compression-model", "/dictation", "/exit", "/help",
-    "/load-instructions", "/plan", "/pwd", "/redo", "/repeat", "/repeat-prompt", "/run-prompt", "/run-py", "/save",
+    "/instructions-load", "/plan", "/pwd", "/redo", "/repeat", "/repeat-prompt", "/run-prompt", "/run-py", "/save",
     "/session-load", "/session-save",
     "/show-compression-stats", "/show-memory-stats", "/show-stats",
     "/save-step", "/set-max-steps", "/show-knowledge", "/show-step", "/show-steps", "/show-tools", "/undo-steps", "/verbose",
@@ -632,13 +632,13 @@ def print_help():
     table.add_row("/compression-keep-compressed-steps <N>", "Change keep_compressed_steps")
     table.add_row("/compression-max-compressed-steps <N>", "Change max_compressed_steps")
     table.add_row("/compression-set-high", "Set compression preset: HIGH (aggressive)")
-    table.add_row("/compression-set-normal", "Set compression preset: NORMAL (balanced)")
+    table.add_row("/compression-set-medium", "Set compression preset: MEDIUM (balanced)")
     table.add_row("/compression-set-low", "Set compression preset: LOW (conservative)")
     table.add_row("/compression-model <model>", "Switch compression model")
     table.add_row(r"/dictation \[on|off]", "Toggle dictation (requires BPSA_DICTATION_TRANSCRIBER)")
     table.add_row("/exit", "Exit the REPL")
     table.add_row("/help", "Show this help message")
-    table.add_row("/load-instructions", "Load agent instruction files into next prompt")
+    table.add_row("/instructions-load", "Load agent instruction files into next prompt")
     table.add_row(r"/plan \[on|off|N]", "Toggle or set planning interval (default: 22)")
     table.add_row("/pwd", "Show current working directory")
     table.add_row("/redo", "Re-run the last prompt (undo last steps and run again)")
@@ -2086,7 +2086,7 @@ def get_input():
             elif cmd == "/pwd":
                 console.print(f"[cyan]{os.getcwd()}[/]")
                 continue
-            elif cmd == "/load-instructions":
+            elif cmd == "/instructions-load":
                 console.print("[dim]Loading agent instructions...[/]")
                 instructions = load_agent_instructions()
                 if instructions:
@@ -2144,7 +2144,7 @@ def get_input():
             elif cmd == "/compression-set-high":
                 cmd_compression_set_high(agent)
                 continue
-            elif cmd == "/compression-set-normal":
+            elif cmd == "/compression-set-medium":
                 cmd_compression_set_normal(agent)
                 continue
             elif cmd == "/compression-set-low":

From eb50bca5696092e2d75e64cc1f8ceb4ccc369233 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 14:08:09 -0300
Subject: [PATCH 22/38] feat: add MCP server support to bp_cli.py

- Add _parse_mcp_servers() helper: parses URL (HTTP) or shell command (stdio) strings
- Add _shutdown_mcp() helper: safe teardown of MCPClient context manager
- build_agent() gains mcp_servers=None param; creates MCPClient, extends tools, stores as agent._mcp_client
- run_one_shot() and run_repl() gain mcp_servers=None param, passed through to build_agent()
- _shutdown_mcp() called at all exit points: EOF, /exit, /clear (which also reconnects)
- main() parses --mcp URL_OR_CMD (repeatable) and wires mcp_servers through

Usage:
  bpsa --mcp http://localhost:8000/mcp
  bpsa --mcp 'npx @mcp/server-filesystem /tmp'
  bpsa --mcp URL1 --mcp CMD2  (multiple servers)

Requires: pip install 'bpsa[mcp]'

Generated by claude-sonnet-4-6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_cli.py | 68 ++++++++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 9 deletions(-)

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 68ebfc1d8..71a98e950 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -325,7 +325,7 @@ def build_model(override_model_id=None):
     return model
 
 
-def build_agent(model, approval_callback=None, browser_enabled=False, gui_enabled=False):
+def build_agent(model, approval_callback=None, browser_enabled=False, gui_enabled=False, mcp_servers=None):
     from smolagents import CodeAgent
     from smolagents.bp_thinkers import (
         DEFAULT_THINKER_COMPRESSION, DEFAULT_THINKER_MAX_STEPS,
@@ -337,6 +337,7 @@ def build_agent(model, approval_callback=None, browser_enabled=False, gui_enable
     tools = list(DEFAULT_THINKER_TOOLS)
     browser_manager = None
     gui_manager = None
+    mcp_client = None
 
     # Image tools — always available (Pillow only; tesseract optional for OCR)
     from smolagents.bp_tools import LoadImageTool, load_image_callback
@@ -356,6 +357,11 @@ def build_agent(model, approval_callback=None, browser_enabled=False, gui_enable
         gui_manager, gui_tools = create_gui_tools()
         tools.extend(gui_tools)
 
+    if mcp_servers:
+        from smolagents import MCPClient
+        mcp_client = MCPClient(mcp_servers, structured_output=True)
+        tools.extend(mcp_client.__enter__())
+
     step_cbs = [_compact_step_callback, load_image_callback]
     if gui_manager:
         from smolagents.bp_tools_gui import gui_screenshot_callback
@@ -392,6 +398,9 @@ def build_agent(model, approval_callback=None, browser_enabled=False, gui_enable
     if gui_manager:
         agent._gui_manager = gui_manager
 
+    if mcp_client:
+        agent._mcp_client = mcp_client
+
     return agent
 
 
@@ -1777,6 +1786,38 @@ def _shutdown_gui(agent):
         manager.shutdown()
 
 
+def _parse_mcp_servers(mcp_list: list[str]):
+    """Parse a list of MCP server strings into server_parameters dicts/objects.
+
+    Each entry is either:
+      - An HTTP URL: {"url": "...", "transport": "streamable-http"}
+      - A command string: StdioServerParameters(command, args=[...])
+    """
+    import shlex
+    from mcp import StdioServerParameters
+    result = []
+    for spec in mcp_list:
+        spec = spec.strip()
+        if not spec:
+            continue
+        if spec.startswith("http://") or spec.startswith("https://"):
+            result.append({"url": spec, "transport": "streamable-http"})
+        else:
+            parts = shlex.split(spec)
+            result.append(StdioServerParameters(command=parts[0], args=parts[1:]))
+    return result
+
+
+def _shutdown_mcp(agent):
+    """Disconnect MCP client if one exists on the agent."""
+    client = getattr(agent, "_mcp_client", None)
+    if client:
+        try:
+            client.__exit__(None, None, None)
+        except Exception:
+            pass
+
+
 def prepend_instructions(task: str, instructions: str | None) -> str:
     if instructions:
         return instructions+"""
@@ -1785,13 +1826,13 @@ def prepend_instructions(task: str, instructions: str | None) -> str:
     return task
 
 
-def run_one_shot(task: str, skip_instructions: bool = False, auto_approve: bool = True, browser_enabled: bool = False, gui_enabled: bool = False):
+def run_one_shot(task: str, skip_instructions: bool = False, auto_approve: bool = True, browser_enabled: bool = False, gui_enabled: bool = False, mcp_servers=None):
     global _auto_approve
     _auto_approve = auto_approve
     try_load_dotenv()
     check_required_env()
     model = build_model()
-    agent = build_agent(model, approval_callback=interactive_approval_callback, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+    agent = build_agent(model, approval_callback=interactive_approval_callback, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
     instructions = None
     if not skip_instructions:
         console.print("[dim]Loading agent instructions...[/]")
@@ -1813,16 +1854,17 @@ def run_one_shot(task: str, skip_instructions: bool = False, auto_approve: bool
         if manager:
             manager.shutdown()
         _shutdown_gui(agent)
+        _shutdown_mcp(agent)
 
 
-def run_repl(skip_instructions: bool = False, auto_approve: bool = True, browser_enabled: bool = False, gui_enabled: bool = False):
+def run_repl(skip_instructions: bool = False, auto_approve: bool = True, browser_enabled: bool = False, gui_enabled: bool = False, mcp_servers=None):
     global _auto_approve
     _auto_approve = auto_approve
     try_load_dotenv()
     check_required_env()
 
     model = build_model()
-    agent = build_agent(model, approval_callback=interactive_approval_callback, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+    agent = build_agent(model, approval_callback=interactive_approval_callback, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
     model_id = get_env("BPSA_MODEL_ID")
     server_model = get_env("BPSA_SERVER_MODEL", default="OpenAIServerModel")
     tool_count = count_tools(agent)
@@ -1944,6 +1986,7 @@ def get_input():
             _shutdown_voice()
             _shutdown_browser(agent)
             _shutdown_gui(agent)
+            _shutdown_mcp(agent)
             console.print("[dim]Goodbye![/]")
             break
 
@@ -1999,6 +2042,7 @@ def get_input():
                 _shutdown_voice()
                 _shutdown_browser(agent)
                 _shutdown_gui(agent)
+                _shutdown_mcp(agent)
                 console.print("[dim]Goodbye![/]")
                 break
             elif cmd == "/help":
@@ -2040,7 +2084,8 @@ def get_input():
             elif cmd == "/clear":
                 _shutdown_browser(agent)
                 _shutdown_gui(agent)
-                agent = build_agent(model, approval_callback=interactive_approval_callback, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+                _shutdown_mcp(agent)
+                agent = build_agent(model, approval_callback=interactive_approval_callback, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
                 session_stats = {
                     "turns": 0,
                     "total_time": 0.0,
@@ -2347,6 +2392,10 @@ def main():
         "--gui-x11", action="store_true",
         help="Enable native GUI interaction tools (screenshot, click, type, key via xdotool/ImageMagick on X11)",
     )
+    parser.add_argument(
+        "--mcp", action="append", metavar="URL_OR_CMD", dest="mcp",
+        help="Connect an MCP server. Use a URL for HTTP servers or a shell command for stdio servers. Can be repeated for multiple servers.",
+    )
     subparsers = parser.add_subparsers(dest="command")
 
     run_parser = subparsers.add_parser("run", help="Run a one-shot task")
@@ -2358,20 +2407,21 @@ def main():
     from smolagents.bp_utils import get_env_bool
     browser_enabled = args.browser or get_env_bool("BPSA_BROWSER")
     gui_enabled = args.gui_x11 or get_env_bool("BPSA_GUI")
+    mcp_servers = _parse_mcp_servers(args.mcp or []) or None
 
     # Piped input detection
     if not sys.stdin.isatty() and args.command is None:
         task = sys.stdin.read().strip()
         if task:
-            run_one_shot(task, skip_instructions=skip_instructions, auto_approve=auto_approve, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+            run_one_shot(task, skip_instructions=skip_instructions, auto_approve=auto_approve, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
         else:
             fail("No input provided via pipe.")
         return
 
     if args.command == "run":
-        run_one_shot(args.task, skip_instructions=skip_instructions, auto_approve=auto_approve, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+        run_one_shot(args.task, skip_instructions=skip_instructions, auto_approve=auto_approve, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
     else:
-        run_repl(skip_instructions=skip_instructions, auto_approve=auto_approve, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+        run_repl(skip_instructions=skip_instructions, auto_approve=auto_approve, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
 
 
 if __name__ == "__main__":

From 36430241d4ec0cbbb631f0fbfa3db6652ba9e30c Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 14:11:00 -0300
Subject: [PATCH 23/38] feat: add MCP server support to bp_ad_infinitum.py

Mirror the MCP integration from bp_cli.py into the autonomous loop:

- run_loop() gains mcp_servers=None param, passed to build_agent()
- _shutdown_mcp(agent) called in finally: block after each prompt task
- main() gains --mcp URL_OR_CMD (repeatable) CLI argument
- _parse_mcp_servers() imported from bp_cli to parse server strings
- mcp_servers wired through main() -> run_loop()
- print_banner() shows MCP server count in the startup panel

Usage:
  ad-infinitum tasks/ --mcp http://localhost:8000/mcp
  ad-infinitum tasks/ --mcp 'npx @mcp/server-filesystem /tmp'

Requires: pip install 'bpsa[mcp]'

Generated by claude-sonnet-4-6
Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_ad_infinitum.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/smolagents/bp_ad_infinitum.py b/src/smolagents/bp_ad_infinitum.py
index 9c8055aca..1a82b974b 100644
--- a/src/smolagents/bp_ad_infinitum.py
+++ b/src/smolagents/bp_ad_infinitum.py
@@ -197,6 +197,8 @@ def print_banner(config: dict):
 
     browser_str = "[green]on[/]" if config.get("browser") else "off"
     gui_str = "[green]on[/]" if config.get("gui") else "off"
+    mcp_count = len(config.get("mcp") or [])
+    mcp_str = f"[green]{mcp_count} server(s)[/]" if mcp_count else "off"
 
     console.print(
         Panel.fit(
@@ -209,7 +211,8 @@ def print_banner(config: dict):
             f"Inject folder: {tree_str} | "
             f"Cooldown: {config['cooldown']}s\n"
             f"Browser: {browser_str} | "
-            f"GUI: {gui_str}",
+            f"GUI: {gui_str} | "
+            f"MCP: {mcp_str}",
             border_style="blue",
         )
     )
@@ -226,9 +229,9 @@ def print_banner(config: dict):
 
 
 def run_loop(model, tasks, cycles, max_steps, plan_interval, tree_folder, cooldown,
-             browser_enabled=False, gui_enabled=False):
+             browser_enabled=False, gui_enabled=False, mcp_servers=None):
     """Core autonomous loop: cycles x tasks, fresh agent per task."""
-    from smolagents.bp_cli import _shutdown_browser, _shutdown_gui, build_agent
+    from smolagents.bp_cli import _shutdown_browser, _shutdown_gui, _shutdown_mcp, build_agent
 
     original_dir = os.getcwd()
     total_start = time.time()
@@ -259,7 +262,7 @@ def run_loop(model, tasks, cycles, max_steps, plan_interval, tree_folder, cooldo
                 if tree_folder:
                     prompt += inject_tree(tree_folder)
 
-                agent = build_agent(model, browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+                agent = build_agent(model, browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
                 if plan_interval:
                     agent.planning_interval = plan_interval
 
@@ -287,6 +290,7 @@ def run_loop(model, tasks, cycles, max_steps, plan_interval, tree_folder, cooldo
                     total_tasks_run += 1
                     console.print(f"[red]FAIL[/] {task_label} | {elapsed:.1f}s | {e}")
                 finally:
+                    _shutdown_mcp(agent)
                     _shutdown_browser(agent)
                     _shutdown_gui(agent)
 
@@ -355,6 +359,10 @@ def main():
         "--gui-x11", action="store_true", default=None,
         help="Enable native GUI interaction tools (overrides BPSA_GUI)",
     )
+    parser.add_argument(
+        "--mcp", action="append", metavar="URL_OR_CMD", dest="mcp",
+        help="MCP server to connect (URL or shell command); repeatable",
+    )
     args = parser.parse_args()
 
     # Install Ctrl+C handler
@@ -381,6 +389,8 @@ def main():
 
     browser_enabled = args.browser if args.browser else get_env_bool("BPSA_BROWSER")
     gui_enabled = args.gui_x11 if args.gui_x11 else get_env_bool("BPSA_GUI")
+    from smolagents.bp_cli import _parse_mcp_servers
+    mcp_servers = _parse_mcp_servers(args.mcp or []) or None
 
     # Load tasks
     console.print("[dim]Loading tasks...[/]")
@@ -397,6 +407,7 @@ def main():
         "cooldown": cooldown,
         "browser": browser_enabled,
         "gui": gui_enabled,
+        "mcp": mcp_servers,
     }
     print_banner(config)
 
@@ -405,7 +416,7 @@ def main():
 
     # Run the loop
     run_loop(model, tasks, cycles, max_steps, plan_interval, tree_folder, cooldown,
-             browser_enabled=browser_enabled, gui_enabled=gui_enabled)
+             browser_enabled=browser_enabled, gui_enabled=gui_enabled, mcp_servers=mcp_servers)
 
 
 if __name__ == "__main__":

From d9d4b73620535cca1123e09dfe77d32cba8b0f12 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 14:31:24 -0300
Subject: [PATCH 24/38] feat: pass post-batch steps to compressor to prevent
 stale knowledge updates

The compressor is now given the steps that occurred AFTER the batch being
compressed (post_steps). This allows the compressor to see what is still
current vs already superseded, preventing it from writing stale knowledge
that was already overridden by more recent steps.

Changes:
- create_compression_prompt(): added post_steps param (list[MemoryStep] | None)
- Builds <subsequent_steps> section in prompt (truncated to 500/300 chars per step)
- compress(): computes post_steps as steps after max(to_compress_indices)
  (excluding TaskStep and CompressedHistoryStep) and passes to prompt builder

Generated by claude-sonnet-4-6

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_compression.py | 47 ++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 339d03fdd..5462d5b38 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -239,6 +239,7 @@ def create_compression_prompt(
     steps_to_compress: list[MemoryStep],
     knowledge: str = "",
     existing_summaries: list["CompressedHistoryStep"] | None = None,
+    post_steps: list[MemoryStep] | None = None,
 ) -> str:
     """Create the prompt for the compression LLM call.
 
@@ -256,6 +257,8 @@ def create_compression_prompt(
         steps_to_compress: List of memory steps to summarize.
         knowledge: Current knowledge store content (tagged XML). Empty string if none.
         existing_summaries: Already-compressed history steps to avoid duplicating.
+        post_steps: Steps that occurred AFTER the batch being compressed. Shown to the
+            compressor so it can see what is still current vs already superseded.
 
     Returns:
         The prompt string for the compression LLM call.
@@ -314,6 +317,37 @@ def create_compression_prompt(
     else:
         knowledge_section = ""
 
+    # Build subsequent steps section (steps AFTER the batch being compressed)
+    if post_steps:
+        post_step_descs = []
+        for step in post_steps:
+            if isinstance(step, ActionStep):
+                desc = f"Step {step.step_number}:"
+                if step.model_output:
+                    output = str(step.model_output)[:500]  # keep brief
+                    desc += f"\n<model_output>{output}</model_output>"
+                if step.observations:
+                    obs = str(step.observations)[:300]
+                    desc += f"\n<result>{obs}</result>"
+                post_step_descs.append('<step>' + desc + '</step>')
+            elif isinstance(step, PlanningStep):
+                plan = (step.plan or "")[:400]
+                post_step_descs.append('<step><plan>' + plan + '</plan></step>')
+        if post_step_descs:
+            post_steps_text = "<\n>".join(post_step_descs)
+            post_steps_section = f"""
+The following steps occurred AFTER the batch you are summarizing. Use them to understand
+what is still current and what has already been superseded. Do NOT summarize these steps —
+they will remain in full detail. Only use them as context to avoid writing stale knowledge.
+<subsequent_steps>
+{post_steps_text}
+</subsequent_steps>
+"""
+        else:
+            post_steps_section = ""
+    else:
+        post_steps_section = ""
+
     # Build deduplication and output instructions
     dedup_parts = []
     if has_history:
@@ -357,7 +391,7 @@ def create_compression_prompt(
     return f"""Hello super-intelligence!
 To your own benefit, please summarize the following agent execution history into a concise summary.
 {COMMON_COMPRESSION_INSTRUCTIONS}
-{history_section}{knowledge_section}{output_instruction}
+{history_section}{knowledge_section}{post_steps_section}{output_instruction}
 This is the execution history to summarize:
 <execution_history>
 {steps_text}
@@ -705,9 +739,18 @@ def compress(self, steps: list[MemoryStep], knowledge: str = "") -> tuple[list[M
         # Collect existing compressed history for deduplication
         existing_summaries = [s for s in steps if isinstance(s, CompressedHistoryStep)]
 
+        # Steps occurring AFTER the batch being compressed (kept in full detail).
+        # Pass these to the compressor so it can see what is still current and
+        # avoid writing knowledge that was already superseded by later steps.
+        max_to_compress_index = max(to_compress_indices)
+        post_steps = [
+            steps[i] for i in range(max_to_compress_index + 1, len(steps))
+            if not isinstance(steps[i], (TaskStep, CompressedHistoryStep))
+        ]
+
         # Generate summary using LLM (history + knowledge aware)
         compression_prompt = create_compression_prompt(
-            steps_to_compress, knowledge, existing_summaries
+            steps_to_compress, knowledge, existing_summaries, post_steps=post_steps
         )
 
         try:

From 03e79b639ee17c57cd533b0b5c45c205ccdbd372 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 14:41:44 -0300
Subject: [PATCH 25/38] feat: pass post-batch steps to Phase 2
 (merge_compressed) to prevent stale knowledge

Extends the post-batch context pattern (from d9d4b73) to Phase 2 compression:
- _build_post_steps_section() extracted as shared standalone helper (used by both phases)
- create_knowledge_extraction_prompt() gains post_steps param
- merge_compressed() computes steps not being merged and passes as post_steps
- LLM now sees subsequent steps before updating knowledge in both phases

This prevents the compressor from writing stale knowledge entries that have
already been superseded by later agent activity.

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_compression.py | 93 +++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 31 deletions(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 5462d5b38..98933c273 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -235,6 +235,53 @@ def should_preserve_step(step: MemoryStep, config: CompressionConfig) -> bool:
     return False
 
 
+def _build_post_steps_section(post_steps: list["MemoryStep"] | None) -> str:
+    """Build a <subsequent_steps> prompt section from steps that follow the compressed batch.
+
+    These steps are shown to the compressor as read-only context so it can avoid
+    writing stale knowledge that has already been superseded by later activity.
+
+    Args:
+        post_steps: Steps occurring after the batch being compressed. May be None or empty.
+
+    Returns:
+        A formatted prompt section string, or empty string if nothing to show.
+    """
+    if not post_steps:
+        return ""
+
+    post_step_descs = []
+    for step in post_steps:
+        if isinstance(step, ActionStep):
+            desc = f"Step {step.step_number}:"
+            if step.model_output:
+                output = str(step.model_output)[:500]
+                desc += f"\n<model_output>{output}</model_output>"
+            if step.observations:
+                obs = str(step.observations)[:300]
+                desc += f"\n<result>{obs}</result>"
+            post_step_descs.append("<step>" + desc + "</step>")
+        elif isinstance(step, PlanningStep):
+            plan = (step.plan or "")[:400]
+            post_step_descs.append("<step><plan>" + plan + "</plan></step>")
+        elif isinstance(step, CompressedHistoryStep):
+            summary = (step.summary or "")[:400]
+            post_step_descs.append(f"<step><compressed_summary>{summary}</compressed_summary></step>")
+
+    if not post_step_descs:
+        return ""
+
+    post_steps_text = "\n".join(post_step_descs)
+    return f"""
+The following steps occurred AFTER the batch you are summarizing. Use them to understand
+what is still current and what has already been superseded. Do NOT summarize these steps --
+they will remain in full detail. Only use them as context to avoid writing stale knowledge.
+<subsequent_steps>
+{post_steps_text}
+</subsequent_steps>
+"""
+
+
 def create_compression_prompt(
     steps_to_compress: list[MemoryStep],
     knowledge: str = "",
@@ -318,35 +365,7 @@ def create_compression_prompt(
         knowledge_section = ""
 
     # Build subsequent steps section (steps AFTER the batch being compressed)
-    if post_steps:
-        post_step_descs = []
-        for step in post_steps:
-            if isinstance(step, ActionStep):
-                desc = f"Step {step.step_number}:"
-                if step.model_output:
-                    output = str(step.model_output)[:500]  # keep brief
-                    desc += f"\n<model_output>{output}</model_output>"
-                if step.observations:
-                    obs = str(step.observations)[:300]
-                    desc += f"\n<result>{obs}</result>"
-                post_step_descs.append('<step>' + desc + '</step>')
-            elif isinstance(step, PlanningStep):
-                plan = (step.plan or "")[:400]
-                post_step_descs.append('<step><plan>' + plan + '</plan></step>')
-        if post_step_descs:
-            post_steps_text = "<\n>".join(post_step_descs)
-            post_steps_section = f"""
-The following steps occurred AFTER the batch you are summarizing. Use them to understand
-what is still current and what has already been superseded. Do NOT summarize these steps —
-they will remain in full detail. Only use them as context to avoid writing stale knowledge.
-<subsequent_steps>
-{post_steps_text}
-</subsequent_steps>
-"""
-        else:
-            post_steps_section = ""
-    else:
-        post_steps_section = ""
+    post_steps_section = _build_post_steps_section(post_steps)
 
     # Build deduplication and output instructions
     dedup_parts = []
@@ -546,6 +565,7 @@ def merge_context(existing: str, updates: str) -> str:
 def create_knowledge_extraction_prompt(
     compressed_steps: list[CompressedHistoryStep],
     existing_tag_names: list[str] | None = None,
+    post_steps: list[MemoryStep] | None = None,
 ) -> str:
     """Create a prompt for extracting knowledge from compressed summaries.
 
@@ -555,6 +575,8 @@ def create_knowledge_extraction_prompt(
     Args:
         compressed_steps: List of CompressedHistoryStep instances to extract knowledge from.
         existing_tag_names: List of tag names currently in the knowledge store.
+        post_steps: Steps occurring after the compressed batch. Passed as read-only context
+            so the LLM avoids writing knowledge that has already been superseded.
 
     Returns:
         The prompt string for the knowledge extraction LLM call.
@@ -585,6 +607,8 @@ def create_knowledge_extraction_prompt(
 the important information found in the summaries below.
 Use descriptive tag names (e.g., <plan>, <architecture>, <key_findings>, <current_status>)."""
 
+    post_steps_section = _build_post_steps_section(post_steps)
+
     return f"""Extract key knowledge from the following {len(compressed_steps)} summaries
 covering {total_steps} total steps of agent execution.
 
@@ -593,7 +617,7 @@ def create_knowledge_extraction_prompt(
 {existing_section}
 
 {COMMON_COMPRESSION_INSTRUCTIONS}
-
+{post_steps_section}
 <SUMMARIES>
 {summaries_text}
 </SUMMARIES>
@@ -934,8 +958,15 @@ def merge_compressed(self, steps: list[MemoryStep], knowledge: str = "") -> tupl
             return steps, knowledge
 
         # Build knowledge extraction prompt and call LLM
+        # post_steps: everything NOT being merged (kept compressed + live recent steps)
+        # so the extractor knows what is still current vs already superseded
+        merge_set_ids = set(id(s) for s in steps_to_merge)
+        post_steps = [
+            s for s in steps
+            if id(s) not in merge_set_ids and not isinstance(s, (TaskStep, SystemPromptStep))
+        ]
         existing_tag_names = list_xml_tag_names(knowledge)
-        merge_prompt = create_knowledge_extraction_prompt(steps_to_merge, existing_tag_names)
+        merge_prompt = create_knowledge_extraction_prompt(steps_to_merge, existing_tag_names, post_steps)
 
         try:
             merge_message = self.compression_model.generate(

From 8797da9edf5bfe4af89dbe967a2aba0d3d2d49d0 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 14:47:26 -0300
Subject: [PATCH 26/38] Small instruction update.

---
 src/smolagents/prompts/code_agent.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/smolagents/prompts/code_agent.yaml b/src/smolagents/prompts/code_agent.yaml
index 9686391d8..b93c0dade 100644
--- a/src/smolagents/prompts/code_agent.yaml
+++ b/src/smolagents/prompts/code_agent.yaml
@@ -177,6 +177,7 @@ system_prompt: |-
   22. If starting a completely new task unrelated to the previous, using summarization/memory or similar tools is a must.
   23. Before you start coding, please search in the existing code for similar functions to those that you intend to implement. Avoid creating
    replicated code.
+  24. If you have a tool to update knowledge, you must keep the knowledge updated.
 
   Any final output that you would like to give such as "my name is Assistant" should be done via a python code block with final_answer("my name is Assistant").
 

From 7ebeefe866237656375a385500d05f77d38aee26 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 15:00:20 -0300
Subject: [PATCH 27/38] Minor prompt fixes.

---
 src/smolagents/bp_compression.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 98933c273..297489856 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -397,6 +397,7 @@ def create_compression_prompt(
 You can add/update/delete as many <tag_name>s as you see fit. 
 
 If no knowledge updates are needed, omit the <knowledge_updates> section entirely.
+In the case that you spot any other error in the knowledge, you can fix as you see fit.
 
 Output format:
 <summary>
@@ -609,8 +610,12 @@ def create_knowledge_extraction_prompt(
 
     post_steps_section = _build_post_steps_section(post_steps)
 
-    return f"""Extract key knowledge from the following {len(compressed_steps)} summaries
+    return f"""Hello super-intelligence!
+Please extract key knowledge from the following {len(compressed_steps)} summaries
 covering {total_steps} total steps of agent execution.
+These summaries are about to be removed from the context. Therefore, updating the knowledge
+with any relevant information is important. In the case that you spot any other error in
+the knowledge, you can fix as you see fit.
 
 Output the knowledge as XML-tagged sections. Each section should contain concise,
 factual information that would be useful for continuing the task.

From 8671f6da319a6a3f53ad2cf3c95c1d102509ba60 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 15:11:25 -0300
Subject: [PATCH 28/38] docs: add Inspirations from Biology section to
 compression-plan.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Documents 9 parallels between BPSA two-phase compression and the Standard Model
of Memory Consolidation in neuroscience:
- Working memory vs long-term memory (prefrontal cortex / hippocampus)
- Sleep consolidation phases (SWS → Phase 1, REM → Phase 2)
- Episodic vs semantic memory (CompressedHistoryStep vs knowledge store)
- Reconstruction vs recording (Bartlett 1932)
- Schemas / semantic networks → tagged XML knowledge
- Attention weight / recency effect → knowledge placement
- Forgetting curve → min_compression_chars threshold
- Motivated forgetting → preserve_final_answer_steps
- Metacognition → agent-driven update_knowledge

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 docs/compression-plan.md | 89 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
index 3a7bf2a67..d97bcf2a3 100644
--- a/docs/compression-plan.md
+++ b/docs/compression-plan.md
@@ -224,3 +224,92 @@ bpsa
 1. Run existing tests: `pytest tests/test_memory.py tests/test_agents.py`
 2. Run compression tests: `pytest tests/test_compression.py`
 3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates from Phase 1
+
+---
+
+## Inspirations from Biology
+
+The two-phase compression pipeline was designed from first principles, yet it converges
+remarkably closely on the **Standard Model of Memory Consolidation** — the dominant
+neuroscientific theory of how biological brains move experiences from short-term storage
+into long-term knowledge. The parallels are not superficial; they reflect deep structural
+constraints that any system managing finite working memory over unbounded experience must
+eventually solve.
+
+### The Deepest Parallel
+
+The entire two-phase design mirrors the **Standard Model of Memory Consolidation**:
+
+```
+Experience → Hippocampus (short-lived, detailed)
+                ↓  (sleep / Phase 1)
+           Compressed replay → early neocortex
+                ↓  (deeper sleep / Phase 2)
+           Abstract semantic knowledge → late neocortex
+                ↓
+           Hippocampus no longer needed for retrieval
+```
+
+Replace hippocampus with "action steps", early neocortex with "CompressedHistoryStep",
+late neocortex with "knowledge store" — and you have BPSA's compression pipeline almost
+exactly.
+
+---
+
+### 1. Working Memory vs. Long-Term Memory
+**BPSA:** Recent steps kept in **full detail** (`keep_recent_steps`). Older steps compressed into summaries.
+**Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
+
+> *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
+
+---
+
+### 2. Sleep Consolidation → Phase 1 + Phase 2
+**BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
+**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep abstracts and integrates those replays into semantic knowledge (Phase 2 analog).
+
+> *Phase 2 in BPSA ("merge_compressed when they accumulate") maps almost perfectly to REM sleep — a second pass that refines, consolidates, and removes raw episodes.*
+
+---
+
+### 3. Episodic vs. Semantic Memory
+**BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
+**Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
+
+> *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
+
+---
+
+### 4. Reconstruction vs. Recording
+**BPSA:** Graceful fallback — if the LLM doesn't follow structured format, the whole output becomes the summary anyway. Knowledge is reconstructed, not byte-copied.
+**Human mind:** Bartlett (1932) showed memory is **reconstructive**, not reproductive. We don't record facts — we rebuild them each time from schemas. Compression is lossy by design, and that's *fine*.
+
+---
+
+### 5. Schemas / Semantic Networks → Tagged XML Knowledge
+**BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
+**Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
+
+---
+
+### 6. Attention Weight → Knowledge Placement
+**BPSA:** Knowledge injected **near the end of context** — "for high attention weight in transformer models."
+**Human mind:** The **recency effect** — items presented last in a sequence are better recalled. Also, the brain gives elevated attention to currently-active goals and beliefs (**Global Workspace Theory**, Baars 1988). Placing knowledge last is the LLM equivalent.
+
+---
+
+### 7. The Forgetting Curve → `min_compression_chars`
+**BPSA:** Skip compression if content is below 4096 chars — don't waste a compression call on already-tiny content.
+**Human mind:** Ebbinghaus's **forgetting curve** — trivial, low-information experiences are never consolidated at all. The brain applies an implicit threshold: only emotionally or informationally significant events get replayed and stored.
+
+---
+
+### 8. Motivated Forgetting → `preserve_final_answer_steps`
+**BPSA:** Final answer steps are *never* compressed away — flagged as important and preserved unconditionally.
+**Human mind:** The brain prioritises **goal-relevant memories**. We remember outcomes and conclusions far longer than intermediate steps. A chess player remembers the winning move, not every prior calculation.
+
+---
+
+### 9. Metacognition → Agent-Driven Knowledge Updates
+**BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
+**Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.

From b7d16b306011723d7e33835b7fefecc4ff6e1aaa Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 15:18:22 -0300
Subject: [PATCH 29/38] Updates documentation about compression.

---
 docs/compression-plan.md | 157 +++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 87 deletions(-)

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
index d97bcf2a3..9d2c56cb2 100644
--- a/docs/compression-plan.md
+++ b/docs/compression-plan.md
@@ -5,6 +5,76 @@
 ## Overview
 A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. Knowledge is extracted incrementally during compression and further refined when compressed summaries accumulate.
 
+## Inspirations from Biology
+
+The two-phase compression pipeline was designed from first principles, yet it converges
+remarkably closely on the **Standard Model of Memory Consolidation** — the dominant
+neuroscientific theory of how biological brains move experiences from short-term storage
+into long-term knowledge. The parallels are not superficial; they reflect deep structural
+constraints that any system managing finite working memory over unbounded experience must
+eventually solve.
+
+### The Deepest Parallel
+
+The entire two-phase design mirrors the **Standard Model of Memory Consolidation**:
+
+```
+Experience → Hippocampus (short-lived, detailed)
+                ↓  (sleep / Phase 1)
+           Compressed replay → early neocortex
+                ↓  (deeper sleep / Phase 2)
+           Abstract semantic knowledge → late neocortex
+                ↓
+           Hippocampus no longer needed for retrieval
+```
+
+Replace hippocampus with "action steps", early neocortex with "CompressedHistoryStep",
+late neocortex with "knowledge store" — and you have BPSA's compression pipeline almost
+exactly.
+
+---
+
+### 1. Working Memory vs. Long-Term Memory
+**BPSA:** Recent steps kept in **full detail** (`keep_recent_steps`). Older steps compressed into summaries.
+**Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
+
+> *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
+
+---
+
+### 2. Sleep Consolidation → Phase 1 + Phase 2
+**BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
+**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep abstracts and integrates those replays into semantic knowledge (Phase 2 analog).
+
+> *Phase 2 in BPSA ("merge_compressed when they accumulate") maps almost perfectly to REM sleep — a second pass that refines, consolidates, and removes raw episodes.*
+
+---
+
+### 3. Episodic vs. Semantic Memory
+**BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
+**Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
+
+> *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
+
+---
+
+### 4. Reconstruction vs. Recording
+**BPSA:** Graceful fallback — if the LLM doesn't follow structured format, the whole output becomes the summary anyway. Knowledge is reconstructed, not byte-copied.
+**Human mind:** Bartlett (1932) showed memory is **reconstructive**, not reproductive. We don't record facts — we rebuild them each time from schemas. Compression is lossy by design, and that's *fine*.
+
+---
+
+### 5. Schemas / Semantic Networks → Tagged XML Knowledge
+**BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
+**Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
+
+---
+
+### 6. Metacognition → Agent-Driven Knowledge Updates
+**BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
+**Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.
+
+
 ## Architecture
 
 ### Two-Phase Compression Pipeline
@@ -225,91 +295,4 @@ bpsa
 2. Run compression tests: `pytest tests/test_compression.py`
 3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates from Phase 1
 
----
-
-## Inspirations from Biology
-
-The two-phase compression pipeline was designed from first principles, yet it converges
-remarkably closely on the **Standard Model of Memory Consolidation** — the dominant
-neuroscientific theory of how biological brains move experiences from short-term storage
-into long-term knowledge. The parallels are not superficial; they reflect deep structural
-constraints that any system managing finite working memory over unbounded experience must
-eventually solve.
-
-### The Deepest Parallel
-
-The entire two-phase design mirrors the **Standard Model of Memory Consolidation**:
 
-```
-Experience → Hippocampus (short-lived, detailed)
-                ↓  (sleep / Phase 1)
-           Compressed replay → early neocortex
-                ↓  (deeper sleep / Phase 2)
-           Abstract semantic knowledge → late neocortex
-                ↓
-           Hippocampus no longer needed for retrieval
-```
-
-Replace hippocampus with "action steps", early neocortex with "CompressedHistoryStep",
-late neocortex with "knowledge store" — and you have BPSA's compression pipeline almost
-exactly.
-
----
-
-### 1. Working Memory vs. Long-Term Memory
-**BPSA:** Recent steps kept in **full detail** (`keep_recent_steps`). Older steps compressed into summaries.
-**Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
-
-> *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
-
----
-
-### 2. Sleep Consolidation → Phase 1 + Phase 2
-**BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
-**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep abstracts and integrates those replays into semantic knowledge (Phase 2 analog).
-
-> *Phase 2 in BPSA ("merge_compressed when they accumulate") maps almost perfectly to REM sleep — a second pass that refines, consolidates, and removes raw episodes.*
-
----
-
-### 3. Episodic vs. Semantic Memory
-**BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
-**Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
-
-> *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
-
----
-
-### 4. Reconstruction vs. Recording
-**BPSA:** Graceful fallback — if the LLM doesn't follow structured format, the whole output becomes the summary anyway. Knowledge is reconstructed, not byte-copied.
-**Human mind:** Bartlett (1932) showed memory is **reconstructive**, not reproductive. We don't record facts — we rebuild them each time from schemas. Compression is lossy by design, and that's *fine*.
-
----
-
-### 5. Schemas / Semantic Networks → Tagged XML Knowledge
-**BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
-**Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
-
----
-
-### 6. Attention Weight → Knowledge Placement
-**BPSA:** Knowledge injected **near the end of context** — "for high attention weight in transformer models."
-**Human mind:** The **recency effect** — items presented last in a sequence are better recalled. Also, the brain gives elevated attention to currently-active goals and beliefs (**Global Workspace Theory**, Baars 1988). Placing knowledge last is the LLM equivalent.
-
----
-
-### 7. The Forgetting Curve → `min_compression_chars`
-**BPSA:** Skip compression if content is below 4096 chars — don't waste a compression call on already-tiny content.
-**Human mind:** Ebbinghaus's **forgetting curve** — trivial, low-information experiences are never consolidated at all. The brain applies an implicit threshold: only emotionally or informationally significant events get replayed and stored.
-
----
-
-### 8. Motivated Forgetting → `preserve_final_answer_steps`
-**BPSA:** Final answer steps are *never* compressed away — flagged as important and preserved unconditionally.
-**Human mind:** The brain prioritises **goal-relevant memories**. We remember outcomes and conclusions far longer than intermediate steps. A chess player remembers the winning move, not every prior calculation.
-
----
-
-### 9. Metacognition → Agent-Driven Knowledge Updates
-**BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
-**Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.

From eb9cb5d3e6affaee4ddb0b493c070f86a5c5f827 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 15:19:27 -0300
Subject: [PATCH 30/38] Renaming compression doc file.

---
 docs/{compression-plan.md => compression.md} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename docs/{compression-plan.md => compression.md} (100%)

diff --git a/docs/compression-plan.md b/docs/compression.md
similarity index 100%
rename from docs/compression-plan.md
rename to docs/compression.md

From 57474e682e57ffc6bc0301e504ec4bf41aae8810 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 15:25:22 -0300
Subject: [PATCH 31/38] Minor fix on the compression documentation.

---
 docs/compression-plan.md | 292 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 292 insertions(+)
 create mode 100644 docs/compression-plan.md

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
new file mode 100644
index 000000000..01429f82f
--- /dev/null
+++ b/docs/compression-plan.md
@@ -0,0 +1,292 @@
+
+
+# Context Compression & Knowledge Extraction
+
+## Overview
+A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. Knowledge is extracted incrementally during compression and further refined when compressed summaries accumulate.
+
+## Inspirations from Biology
+
+The two-phase compression pipeline was designed from first principles, yet it converges
+remarkably closely on the **Standard Model of Memory Consolidation** — the dominant
+neuroscientific theory of how biological brains move experiences from short-term storage
+into long-term knowledge. The parallels are not superficial; they reflect deep structural
+constraints that any system managing finite working memory over unbounded experience must
+eventually solve.
+
+### The Deepest Parallel
+
+The entire two-phase design mirrors the **Standard Model of Memory Consolidation**:
+
+```
+Experience → Hippocampus (short-lived, detailed)
+                ↓  (sleep / Phase 1)
+           Compressed replay → early neocortex
+                ↓  (deeper sleep / Phase 2)
+           Abstract semantic knowledge → late neocortex
+                ↓
+           Hippocampus no longer needed for retrieval
+```
+
+Replace hippocampus with "action steps", early neocortex with "CompressedHistoryStep",
+late neocortex with "knowledge store" — and you have BPSA's compression pipeline almost
+exactly.
+
+---
+
+### 1. Working Memory vs. Long-Term Memory
+**BPSA:** Recent steps kept in **full detail** (`keep_recent_steps`). Older steps compressed into summaries.
+**Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
+
+> *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
+
+---
+
+### 2. Sleep Consolidation → Phase 1 + Phase 2
+**BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
+**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep abstracts and integrates those replays into semantic knowledge (Phase 2 analog).
+
+> *Phase 2 in BPSA ("merge_compressed when they accumulate") maps almost perfectly to REM sleep — a second pass that refines, consolidates, and removes raw episodes.*
+
+---
+
+### 3. Episodic vs. Semantic Memory
+**BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
+**Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
+
+> *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
+
+---
+
+### 4. Schemas / Semantic Networks → Tagged XML Knowledge
+**BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
+**Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
+
+---
+
+### 5. Metacognition → Agent-Driven Knowledge Updates
+**BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
+**Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.
+
+
+## Architecture
+
+### Two-Phase Compression Pipeline
+
+**Phase 1 — Step Compression + Knowledge Extraction:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. The same LLM call also extracts knowledge updates, which are applied to the persistent knowledge store immediately. The LLM receives both the full compressed history (past events) and the full knowledge store (current facts) so it can avoid all duplication and propose corrections. Recent steps are kept in full detail.
+
+**Phase 2 — Knowledge Refinement:** When compressed steps accumulate beyond a threshold, older ones are merged into the knowledge store via a separate LLM call. The merged compressed steps are then removed entirely. This phase refines and consolidates knowledge that may have been partially captured in Phase 1.
+
+```
+Steps accumulate → Phase 1: compress older steps
+                     ↓
+                   LLM produces <summary> + optional <knowledge_updates>
+                     ↓                              ↓
+                   CompressedHistoryStep    merge_context() → memory.knowledge
+                     ↓
+                   (when too many compressed steps accumulate)
+                     ↓
+                   Phase 2: extract knowledge from old compressed steps
+                     ↓
+                   merge_context() → memory.knowledge
+                     ↓
+                   Old compressed steps removed
+                     ↓
+                   Knowledge injected into LLM context
+                   as <knowledge>...</knowledge> message
+```
+
+## Files
+
+### `src/smolagents/bp_compression.py`
+All compression and knowledge logic:
+
+```python
+@dataclass
+class CompressionConfig:
+    enabled: bool = True
+    keep_recent_steps: int = 5            # Recent steps to keep in full
+    max_uncompressed_steps: int = 10      # Compress when exceeds this
+    estimated_token_threshold: int = 0    # Token-based trigger (0=disabled)
+    compression_model: Model | None = None  # Separate model for compression (None=use main)
+    max_summary_tokens: int = 50000       # Max tokens for generated summary
+    preserve_error_steps: bool = False    # Keep error steps uncompressed
+    preserve_final_answer_steps: bool = True  # Keep final_answer steps uncompressed
+    max_compressed_steps: int = 32        # Merge compressed steps when exceeds this
+    keep_compressed_steps: int = 22       # Recent compressed steps to keep during merge
+    min_compression_chars: int = 4096     # Skip compression if content below this
+
+@dataclass
+class CompressedHistoryStep(MemoryStep):
+    summary: str
+    compressed_step_numbers: list[int]
+    original_step_count: int
+    timing: Timing | None
+    compression_token_usage: TokenUsage | None
+    # to_messages() renders as [COMPRESSED HISTORY - N steps summarized]
+
+class ContextCompressor:
+    def should_compress(steps) -> bool
+    def compress(steps, knowledge) -> tuple[list[MemoryStep], str]
+    def should_merge_compressed(steps) -> bool
+    def merge_compressed(steps, knowledge) -> tuple[list[MemoryStep], str]
+```
+
+Key functions:
+- `estimate_tokens(text)` — Character-based heuristic (~4 chars/token)
+- `estimate_step_tokens(step)` — Token estimate for a memory step
+- `should_preserve_step(step, config)` — Check if step must be kept
+- `create_compression_prompt(steps, knowledge, existing_summaries)` — Build LLM prompt for step summarization with full context: existing compressed history (to avoid duplicating events) and knowledge store (current facts, updatable). Requests structured `<summary>` + optional `<knowledge_updates>` output
+- `parse_compression_output(raw_output)` — Parse structured LLM output into `(summary, knowledge_updates)` with graceful fallback for unstructured output
+- `create_knowledge_extraction_prompt(steps, tag_names)` — Build LLM prompt for Phase 2 knowledge extraction
+- `create_merge_prompt(steps)` — Build prompt for merging compressed steps
+- `list_xml_tag_names(text)` — Extract XML tag names from a string
+- `merge_context(existing, updates)` — Apply tagged XML diff (add/update/delete)
+- `create_compression_callback(compressor)` — Callback for automatic triggering
+
+### `src/smolagents/agents.py`
+Integration in `MultiStepAgent`:
+- `__init__` accepts `compression_config: CompressionConfig | None = None`
+- `_setup_compression()` registers the compression callback
+- `write_memory_to_messages()` injects `memory.knowledge` as a `<knowledge>` message just before the last message in context
+- System prompt log line shows Context and Knowledge char counts
+
+### `src/smolagents/memory.py`
+- `AgentMemory.knowledge: str = ""` — Persistent knowledge store (tagged XML)
+- Reset on `memory.reset()`
+
+### `src/smolagents/bp_tools.py`
+- `UpdateKnowledge` tool — Allows the agent to explicitly update its knowledge store via `update_knowledge(updates='<tag>content</tag>')`
+
+### `src/smolagents/bp_cli.py`
+- `print_turn_summary()` shows Context and Knowledge char counts
+- `/compress` command handles tuple return from `compress()`
+- Environment variable configuration (see below)
+
+### `tests/test_compression.py`
+Tests for:
+- `CompressedHistoryStep.to_messages()` and `dict()` serialization
+- Token estimation functions
+- `should_preserve_step()` logic
+- `ContextCompressor.should_compress()` threshold behavior
+- `ContextCompressor.compress()` — tuple return, knowledge extraction, fallback for unstructured output
+- `parse_compression_output()` — structured output, summary-only, fallback, empty/None input
+- `merge_context()` add/update/delete operations
+- `list_xml_tag_names()` extraction
+- Integration test with mock model
+
+## Knowledge Store
+
+The knowledge store (`memory.knowledge`) is a plain string of tagged XML:
+
+```xml
+<plan>1. Setup done
+2. Now implementing API</plan>
+<key_findings>The database uses PostgreSQL 14 with pgvector extension</key_findings>
+<current_status>API endpoints implemented, testing in progress</current_status>
+```
+
+**Three sources of updates:**
+1. **Phase 1 (automatic):** `compress()` extracts `<knowledge_updates>` from the same LLM call that produces the summary — knowledge starts accumulating from the very first compression cycle
+2. **Phase 2 (automatic):** `merge_compressed()` extracts knowledge from old compressed summaries when they accumulate beyond the threshold — refines and consolidates
+3. **Manual:** The `update_knowledge` tool lets the agent explicitly add/update/delete sections at any time
+
+**`merge_context(existing, updates)` applies three operations:**
+- `<tag>content</tag>` where tag exists → **UPDATE** (replace content)
+- `<tag>content</tag>` where tag is new → **APPEND**
+- `<tag/>` or `<tag></tag>` (self-closing/empty) → **DELETE**
+
+**Injection:** Knowledge is inserted as a `<knowledge>...</knowledge>` USER message just before the last message in the LLM context, giving it high attention weight.
+
+### Phase 1 Knowledge Extraction
+
+During Phase 1 compression, the LLM receives:
+- The full current knowledge store as `<current_knowledge>` context
+- Instructions to output structured format:
+
+```
+<summary>
+Concise summary of compressed steps...
+</summary>
+<knowledge_updates>
+<tag>new or updated content</tag>
+<obsolete_tag/>
+</knowledge_updates>
+```
+
+The `parse_compression_output()` function handles parsing with graceful fallback:
+- If `<summary>` tags present → extract summary and knowledge_updates separately
+- If no `<summary>` tags → entire output becomes the summary (backwards compatible)
+- If no `<knowledge_updates>` → no knowledge changes applied
+
+This design means:
+- **Zero extra LLM calls** — knowledge extraction piggybacks on the existing compression call
+- **Higher fidelity** — Phase 1 has access to full original steps (not lossy summaries)
+- **Immediate availability** — knowledge accumulates from the first compression, not after 32+ steps
+
+## BPSA CLI Configuration
+
+Environment variables (with defaults used by the CLI):
+
+| Variable | Default | Description |
+|---|---|---|
+| `BPSA_COMPRESSION_ENABLED` | `1` | Enable compression |
+| `BPSA_COMPRESSION_KEEP_RECENT_STEPS` | `40` | Recent steps to keep uncompressed |
+| `BPSA_COMPRESSION_MAX_UNCOMPRESSED_STEPS` | `50` | Trigger threshold for compression |
+| `BPSA_COMPRESSION_KEEP_COMPRESSED_STEPS` | `80` | Compressed steps to keep on merge |
+| `BPSA_COMPRESSION_MAX_COMPRESSED_STEPS` | `120` | Trigger threshold for merge |
+| `BPSA_COMPRESSION_TOKEN_THRESHOLD` | `0` | Token-based trigger (0=disabled) |
+| `BPSA_COMPRESSION_MODEL` | same as main | Model ID for compression |
+| `BPSA_COMPRESSION_MAX_SUMMARY_TOKENS` | `50000` | Max tokens in summary |
+| `BPSA_COMPRESSION_PRESERVE_ERROR_STEPS` | `0` | Keep error steps uncompressed |
+| `BPSA_COMPRESSION_PRESERVE_FINAL_ANSWER_STEPS` | `1` | Keep final_answer steps |
+| `BPSA_COMPRESSION_MIN_CHARS` | `4096` | Min chars before compressing |
+
+Note: The CLI defaults differ from `CompressionConfig` defaults to suit interactive use (more steps kept).
+
+## Usage Example
+
+### Programmatic
+```python
+from smolagents import CodeAgent, CompressionConfig, LiteLLMModel
+
+config = CompressionConfig(
+    keep_recent_steps=5,
+    max_uncompressed_steps=10,
+    compression_model=LiteLLMModel(model_id="gpt-4o-mini"),  # Cheaper model
+    max_compressed_steps=32,
+    keep_compressed_steps=22,
+)
+
+agent = CodeAgent(
+    tools=[...],
+    model=main_model,
+    compression_config=config,
+)
+```
+
+### BPSA CLI
+```bash
+export BPSA_COMPRESSION_ENABLED=1
+export BPSA_COMPRESSION_KEEP_RECENT_STEPS=40
+export BPSA_COMPRESSION_MAX_UNCOMPRESSED_STEPS=50
+bpsa
+```
+
+## Design Decisions
+- **New file vs existing:** `bp_compression.py` keeps all compression/knowledge logic together, follows pattern of `monitoring.py`
+- **Callback-based:** Uses existing callback system for clean integration without modifying the agent loop
+- **Token estimation:** Character heuristic (4 chars/token) since no proactive token counting exists
+- **Graceful fallback:** If compression LLM call fails, keep original steps and log warning. If LLM doesn't follow structured format, entire output becomes the summary with no knowledge changes.
+- **Combined summary + knowledge in Phase 1:** Single LLM call produces both summary and knowledge updates. The LLM sees the full compressed history AND knowledge store so it can avoid all duplication. The prompt explains the distinction: compressed history = events/changes over time, knowledge = current beliefs/facts. Zero extra cost.
+- **Two-phase design:** Phase 1 extracts knowledge from full original steps (high fidelity). Phase 2 refines/consolidates from compressed summaries when they accumulate. Both phases use `merge_context()` for consistent tagged XML operations.
+- **Tagged XML for knowledge:** Simple, parseable format that supports incremental updates via diff operations
+- **Knowledge placement:** Injected near end of context for high attention weight in transformer models
+- **Min chars threshold:** Avoids wasting LLM calls on already-concise content
+
+## Verification
+1. Run existing tests: `pytest tests/test_memory.py tests/test_agents.py`
+2. Run compression tests: `pytest tests/test_compression.py`
+3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates from Phase 1
+
+

From ab25e836a6518a2a9243fb4d2cf57a92752a6cad Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Mon, 2 Mar 2026 15:28:18 -0300
Subject: [PATCH 32/38] Minor fix to the compression documentation.

---
 docs/compression-plan.md | 292 ---------------------------------------
 docs/compression.md      |  10 +-
 2 files changed, 2 insertions(+), 300 deletions(-)
 delete mode 100644 docs/compression-plan.md

diff --git a/docs/compression-plan.md b/docs/compression-plan.md
deleted file mode 100644
index 01429f82f..000000000
--- a/docs/compression-plan.md
+++ /dev/null
@@ -1,292 +0,0 @@
-
-
-# Context Compression & Knowledge Extraction
-
-## Overview
-A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. Knowledge is extracted incrementally during compression and further refined when compressed summaries accumulate.
-
-## Inspirations from Biology
-
-The two-phase compression pipeline was designed from first principles, yet it converges
-remarkably closely on the **Standard Model of Memory Consolidation** — the dominant
-neuroscientific theory of how biological brains move experiences from short-term storage
-into long-term knowledge. The parallels are not superficial; they reflect deep structural
-constraints that any system managing finite working memory over unbounded experience must
-eventually solve.
-
-### The Deepest Parallel
-
-The entire two-phase design mirrors the **Standard Model of Memory Consolidation**:
-
-```
-Experience → Hippocampus (short-lived, detailed)
-                ↓  (sleep / Phase 1)
-           Compressed replay → early neocortex
-                ↓  (deeper sleep / Phase 2)
-           Abstract semantic knowledge → late neocortex
-                ↓
-           Hippocampus no longer needed for retrieval
-```
-
-Replace hippocampus with "action steps", early neocortex with "CompressedHistoryStep",
-late neocortex with "knowledge store" — and you have BPSA's compression pipeline almost
-exactly.
-
----
-
-### 1. Working Memory vs. Long-Term Memory
-**BPSA:** Recent steps kept in **full detail** (`keep_recent_steps`). Older steps compressed into summaries.
-**Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
-
-> *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
-
----
-
-### 2. Sleep Consolidation → Phase 1 + Phase 2
-**BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
-**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep abstracts and integrates those replays into semantic knowledge (Phase 2 analog).
-
-> *Phase 2 in BPSA ("merge_compressed when they accumulate") maps almost perfectly to REM sleep — a second pass that refines, consolidates, and removes raw episodes.*
-
----
-
-### 3. Episodic vs. Semantic Memory
-**BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
-**Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
-
-> *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
-
----
-
-### 4. Schemas / Semantic Networks → Tagged XML Knowledge
-**BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
-**Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
-
----
-
-### 5. Metacognition → Agent-Driven Knowledge Updates
-**BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
-**Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.
-
-
-## Architecture
-
-### Two-Phase Compression Pipeline
-
-**Phase 1 — Step Compression + Knowledge Extraction:** Older action steps are summarized by the LLM into `CompressedHistoryStep` instances. The same LLM call also extracts knowledge updates, which are applied to the persistent knowledge store immediately. The LLM receives both the full compressed history (past events) and the full knowledge store (current facts) so it can avoid all duplication and propose corrections. Recent steps are kept in full detail.
-
-**Phase 2 — Knowledge Refinement:** When compressed steps accumulate beyond a threshold, older ones are merged into the knowledge store via a separate LLM call. The merged compressed steps are then removed entirely. This phase refines and consolidates knowledge that may have been partially captured in Phase 1.
-
-```
-Steps accumulate → Phase 1: compress older steps
-                     ↓
-                   LLM produces <summary> + optional <knowledge_updates>
-                     ↓                              ↓
-                   CompressedHistoryStep    merge_context() → memory.knowledge
-                     ↓
-                   (when too many compressed steps accumulate)
-                     ↓
-                   Phase 2: extract knowledge from old compressed steps
-                     ↓
-                   merge_context() → memory.knowledge
-                     ↓
-                   Old compressed steps removed
-                     ↓
-                   Knowledge injected into LLM context
-                   as <knowledge>...</knowledge> message
-```
-
-## Files
-
-### `src/smolagents/bp_compression.py`
-All compression and knowledge logic:
-
-```python
-@dataclass
-class CompressionConfig:
-    enabled: bool = True
-    keep_recent_steps: int = 5            # Recent steps to keep in full
-    max_uncompressed_steps: int = 10      # Compress when exceeds this
-    estimated_token_threshold: int = 0    # Token-based trigger (0=disabled)
-    compression_model: Model | None = None  # Separate model for compression (None=use main)
-    max_summary_tokens: int = 50000       # Max tokens for generated summary
-    preserve_error_steps: bool = False    # Keep error steps uncompressed
-    preserve_final_answer_steps: bool = True  # Keep final_answer steps uncompressed
-    max_compressed_steps: int = 32        # Merge compressed steps when exceeds this
-    keep_compressed_steps: int = 22       # Recent compressed steps to keep during merge
-    min_compression_chars: int = 4096     # Skip compression if content below this
-
-@dataclass
-class CompressedHistoryStep(MemoryStep):
-    summary: str
-    compressed_step_numbers: list[int]
-    original_step_count: int
-    timing: Timing | None
-    compression_token_usage: TokenUsage | None
-    # to_messages() renders as [COMPRESSED HISTORY - N steps summarized]
-
-class ContextCompressor:
-    def should_compress(steps) -> bool
-    def compress(steps, knowledge) -> tuple[list[MemoryStep], str]
-    def should_merge_compressed(steps) -> bool
-    def merge_compressed(steps, knowledge) -> tuple[list[MemoryStep], str]
-```
-
-Key functions:
-- `estimate_tokens(text)` — Character-based heuristic (~4 chars/token)
-- `estimate_step_tokens(step)` — Token estimate for a memory step
-- `should_preserve_step(step, config)` — Check if step must be kept
-- `create_compression_prompt(steps, knowledge, existing_summaries)` — Build LLM prompt for step summarization with full context: existing compressed history (to avoid duplicating events) and knowledge store (current facts, updatable). Requests structured `<summary>` + optional `<knowledge_updates>` output
-- `parse_compression_output(raw_output)` — Parse structured LLM output into `(summary, knowledge_updates)` with graceful fallback for unstructured output
-- `create_knowledge_extraction_prompt(steps, tag_names)` — Build LLM prompt for Phase 2 knowledge extraction
-- `create_merge_prompt(steps)` — Build prompt for merging compressed steps
-- `list_xml_tag_names(text)` — Extract XML tag names from a string
-- `merge_context(existing, updates)` — Apply tagged XML diff (add/update/delete)
-- `create_compression_callback(compressor)` — Callback for automatic triggering
-
-### `src/smolagents/agents.py`
-Integration in `MultiStepAgent`:
-- `__init__` accepts `compression_config: CompressionConfig | None = None`
-- `_setup_compression()` registers the compression callback
-- `write_memory_to_messages()` injects `memory.knowledge` as a `<knowledge>` message just before the last message in context
-- System prompt log line shows Context and Knowledge char counts
-
-### `src/smolagents/memory.py`
-- `AgentMemory.knowledge: str = ""` — Persistent knowledge store (tagged XML)
-- Reset on `memory.reset()`
-
-### `src/smolagents/bp_tools.py`
-- `UpdateKnowledge` tool — Allows the agent to explicitly update its knowledge store via `update_knowledge(updates='<tag>content</tag>')`
-
-### `src/smolagents/bp_cli.py`
-- `print_turn_summary()` shows Context and Knowledge char counts
-- `/compress` command handles tuple return from `compress()`
-- Environment variable configuration (see below)
-
-### `tests/test_compression.py`
-Tests for:
-- `CompressedHistoryStep.to_messages()` and `dict()` serialization
-- Token estimation functions
-- `should_preserve_step()` logic
-- `ContextCompressor.should_compress()` threshold behavior
-- `ContextCompressor.compress()` — tuple return, knowledge extraction, fallback for unstructured output
-- `parse_compression_output()` — structured output, summary-only, fallback, empty/None input
-- `merge_context()` add/update/delete operations
-- `list_xml_tag_names()` extraction
-- Integration test with mock model
-
-## Knowledge Store
-
-The knowledge store (`memory.knowledge`) is a plain string of tagged XML:
-
-```xml
-<plan>1. Setup done
-2. Now implementing API</plan>
-<key_findings>The database uses PostgreSQL 14 with pgvector extension</key_findings>
-<current_status>API endpoints implemented, testing in progress</current_status>
-```
-
-**Three sources of updates:**
-1. **Phase 1 (automatic):** `compress()` extracts `<knowledge_updates>` from the same LLM call that produces the summary — knowledge starts accumulating from the very first compression cycle
-2. **Phase 2 (automatic):** `merge_compressed()` extracts knowledge from old compressed summaries when they accumulate beyond the threshold — refines and consolidates
-3. **Manual:** The `update_knowledge` tool lets the agent explicitly add/update/delete sections at any time
-
-**`merge_context(existing, updates)` applies three operations:**
-- `<tag>content</tag>` where tag exists → **UPDATE** (replace content)
-- `<tag>content</tag>` where tag is new → **APPEND**
-- `<tag/>` or `<tag></tag>` (self-closing/empty) → **DELETE**
-
-**Injection:** Knowledge is inserted as a `<knowledge>...</knowledge>` USER message just before the last message in the LLM context, giving it high attention weight.
-
-### Phase 1 Knowledge Extraction
-
-During Phase 1 compression, the LLM receives:
-- The full current knowledge store as `<current_knowledge>` context
-- Instructions to output structured format:
-
-```
-<summary>
-Concise summary of compressed steps...
-</summary>
-<knowledge_updates>
-<tag>new or updated content</tag>
-<obsolete_tag/>
-</knowledge_updates>
-```
-
-The `parse_compression_output()` function handles parsing with graceful fallback:
-- If `<summary>` tags present → extract summary and knowledge_updates separately
-- If no `<summary>` tags → entire output becomes the summary (backwards compatible)
-- If no `<knowledge_updates>` → no knowledge changes applied
-
-This design means:
-- **Zero extra LLM calls** — knowledge extraction piggybacks on the existing compression call
-- **Higher fidelity** — Phase 1 has access to full original steps (not lossy summaries)
-- **Immediate availability** — knowledge accumulates from the first compression, not after 32+ steps
-
-## BPSA CLI Configuration
-
-Environment variables (with defaults used by the CLI):
-
-| Variable | Default | Description |
-|---|---|---|
-| `BPSA_COMPRESSION_ENABLED` | `1` | Enable compression |
-| `BPSA_COMPRESSION_KEEP_RECENT_STEPS` | `40` | Recent steps to keep uncompressed |
-| `BPSA_COMPRESSION_MAX_UNCOMPRESSED_STEPS` | `50` | Trigger threshold for compression |
-| `BPSA_COMPRESSION_KEEP_COMPRESSED_STEPS` | `80` | Compressed steps to keep on merge |
-| `BPSA_COMPRESSION_MAX_COMPRESSED_STEPS` | `120` | Trigger threshold for merge |
-| `BPSA_COMPRESSION_TOKEN_THRESHOLD` | `0` | Token-based trigger (0=disabled) |
-| `BPSA_COMPRESSION_MODEL` | same as main | Model ID for compression |
-| `BPSA_COMPRESSION_MAX_SUMMARY_TOKENS` | `50000` | Max tokens in summary |
-| `BPSA_COMPRESSION_PRESERVE_ERROR_STEPS` | `0` | Keep error steps uncompressed |
-| `BPSA_COMPRESSION_PRESERVE_FINAL_ANSWER_STEPS` | `1` | Keep final_answer steps |
-| `BPSA_COMPRESSION_MIN_CHARS` | `4096` | Min chars before compressing |
-
-Note: The CLI defaults differ from `CompressionConfig` defaults to suit interactive use (more steps kept).
-
-## Usage Example
-
-### Programmatic
-```python
-from smolagents import CodeAgent, CompressionConfig, LiteLLMModel
-
-config = CompressionConfig(
-    keep_recent_steps=5,
-    max_uncompressed_steps=10,
-    compression_model=LiteLLMModel(model_id="gpt-4o-mini"),  # Cheaper model
-    max_compressed_steps=32,
-    keep_compressed_steps=22,
-)
-
-agent = CodeAgent(
-    tools=[...],
-    model=main_model,
-    compression_config=config,
-)
-```
-
-### BPSA CLI
-```bash
-export BPSA_COMPRESSION_ENABLED=1
-export BPSA_COMPRESSION_KEEP_RECENT_STEPS=40
-export BPSA_COMPRESSION_MAX_UNCOMPRESSED_STEPS=50
-bpsa
-```
-
-## Design Decisions
-- **New file vs existing:** `bp_compression.py` keeps all compression/knowledge logic together, follows pattern of `monitoring.py`
-- **Callback-based:** Uses existing callback system for clean integration without modifying the agent loop
-- **Token estimation:** Character heuristic (4 chars/token) since no proactive token counting exists
-- **Graceful fallback:** If compression LLM call fails, keep original steps and log warning. If LLM doesn't follow structured format, entire output becomes the summary with no knowledge changes.
-- **Combined summary + knowledge in Phase 1:** Single LLM call produces both summary and knowledge updates. The LLM sees the full compressed history AND knowledge store so it can avoid all duplication. The prompt explains the distinction: compressed history = events/changes over time, knowledge = current beliefs/facts. Zero extra cost.
-- **Two-phase design:** Phase 1 extracts knowledge from full original steps (high fidelity). Phase 2 refines/consolidates from compressed summaries when they accumulate. Both phases use `merge_context()` for consistent tagged XML operations.
-- **Tagged XML for knowledge:** Simple, parseable format that supports incremental updates via diff operations
-- **Knowledge placement:** Injected near end of context for high attention weight in transformer models
-- **Min chars threshold:** Avoids wasting LLM calls on already-concise content
-
-## Verification
-1. Run existing tests: `pytest tests/test_memory.py tests/test_agents.py`
-2. Run compression tests: `pytest tests/test_compression.py`
-3. Manual test: Create agent with compression enabled, run multi-step task, verify memory gets compressed and knowledge accumulates from Phase 1
-
-
diff --git a/docs/compression.md b/docs/compression.md
index 9d2c56cb2..01429f82f 100644
--- a/docs/compression.md
+++ b/docs/compression.md
@@ -58,19 +58,13 @@ exactly.
 
 ---
 
-### 4. Reconstruction vs. Recording
-**BPSA:** Graceful fallback — if the LLM doesn't follow structured format, the whole output becomes the summary anyway. Knowledge is reconstructed, not byte-copied.
-**Human mind:** Bartlett (1932) showed memory is **reconstructive**, not reproductive. We don't record facts — we rebuild them each time from schemas. Compression is lossy by design, and that's *fine*.
-
----
-
-### 5. Schemas / Semantic Networks → Tagged XML Knowledge
+### 4. Schemas / Semantic Networks → Tagged XML Knowledge
 **BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
 **Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
 
 ---
 
-### 6. Metacognition → Agent-Driven Knowledge Updates
+### 5. Metacognition → Agent-Driven Knowledge Updates
 **BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
 **Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.
 

From 7aec6a9da99eb6c4d41a1e2765734c8f832e51a9 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 00:11:40 -0300
Subject: [PATCH 33/38] Compression code review.

---
 docs/compression.md                    | 21 ++++++++++++---------
 src/smolagents/bp_cli.py               | 16 ++++++++--------
 src/smolagents/bp_compression.py       | 14 ++++++++++++--
 src/smolagents/prompts/code_agent.yaml | 12 +++++++++---
 4 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/docs/compression.md b/docs/compression.md
index 01429f82f..a6d6cc882 100644
--- a/docs/compression.md
+++ b/docs/compression.md
@@ -3,16 +3,19 @@
 # Context Compression & Knowledge Extraction
 
 ## Overview
-A hybrid rolling summarization system for smolagents that compresses older memory steps via LLM summarization while keeping recent steps in full detail. Knowledge is extracted incrementally during compression and further refined when compressed summaries accumulate.
+A hybrid rolling summarization system for BPSA that compresses older memory steps via LLM summarization while keeping recent steps in full detail. Knowledge is extracted incrementally during compression and further refined when compressed summaries accumulate.
 
 ## Inspirations from Biology
 
 The two-phase compression pipeline was designed from first principles, yet it converges
-remarkably closely on the **Standard Model of Memory Consolidation** — the dominant
+strikingly closely on the **Standard Model of Memory Consolidation** — the dominant
 neuroscientific theory of how biological brains move experiences from short-term storage
-into long-term knowledge. The parallels are not superficial; they reflect deep structural
-constraints that any system managing finite working memory over unbounded experience must
-eventually solve.
+into long-term knowledge. The parallels are not superficial; they appear to reflect deep
+structural constraints that any system managing finite working memory over unbounded
+experience must eventually solve. This convergence is a hypothesis, not a proven fact —
+but the hypothesis is a strong one: finite capacity + unbounded experience + the need for
+generalisation are universal pressures, and similar pressures tend to produce similar
+solutions regardless of substrate.
 
 ### The Deepest Parallel
 
@@ -44,9 +47,9 @@ exactly.
 
 ### 2. Sleep Consolidation → Phase 1 + Phase 2
 **BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
-**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep abstracts and integrates those replays into semantic knowledge (Phase 2 analog).
+**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep is associated with abstracting and integrating those replays into semantic knowledge (Phase 2 analog). The analogy is functional: SWS and REM differ in their underlying neural mechanisms (sharp-wave ripples vs. theta oscillations) rather than being a clean "first pass / second pass" distinction, but the broad directionality — from detailed episodic replay toward abstract semantic integration — maps well onto BPSA's two phases.
 
-> *Phase 2 in BPSA ("merge_compressed when they accumulate") maps almost perfectly to REM sleep — a second pass that refines, consolidates, and removes raw episodes.*
+> *Phase 2 in BPSA ("merge_compressed when they accumulate") is functionally analogous to the later sleep stages that refine, consolidate, and eventually render raw episodic traces unnecessary for retrieval.*
 
 ---
 
@@ -64,9 +67,9 @@ exactly.
 
 ---
 
-### 5. Metacognition → Agent-Driven Knowledge Updates
+### 5. Deliberate Belief Revision → Agent-Driven Knowledge Updates
 **BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
-**Human mind:** **Metacognition** — the ability to consciously reflect on and revise one's own beliefs. This is the highest-level memory operation, reserved for deliberate reasoning — exactly what the live agent does when it calls `update_knowledge`.
+**Human mind:** **Deliberate belief revision** — the conscious, intentional process of updating one's own knowledge when new evidence or reasoning warrants it. This is distinct from *metacognition* in the strict cognitive science sense (which additionally involves monitoring uncertainty and regulating reasoning strategies); what the agent does here is closer to deliberate note-taking and self-correction — updating a belief because a step's outcome has changed what is known to be true.
 
 
 ## Architecture
diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 71a98e950..3982d2f96 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -1285,18 +1285,18 @@ def cmd_compression_set_high(agent):
     config = _get_compression_config(agent)
     if config is None:
         return
-    config.keep_recent_steps = 10
-    config.max_uncompressed_steps = 13
+    config.keep_recent_steps = 20
+    config.max_uncompressed_steps = 25
     config.keep_compressed_steps = 10
-    config.max_compressed_steps = 13
+    config.max_compressed_steps = 15
     table = Table(show_header=False, box=None)
     table.add_column(style="cyan", no_wrap=True)
     table.add_column(style="green")
     table.add_row("Compression preset", "HIGH")
-    table.add_row("keep_recent_steps", "10")
-    table.add_row("max_uncompressed_steps", "13")
-    table.add_row("keep_compressed_steps", "10")
-    table.add_row("max_compressed_steps", "13")
+    table.add_row("keep_recent_steps", "20")
+    table.add_row("max_uncompressed_steps", "25")
+    table.add_row("keep_compressed_steps", "20")
+    table.add_row("max_compressed_steps", "25")
     console.print(table)
 
 
@@ -1307,7 +1307,7 @@ def cmd_compression_set_normal(agent):
         return
     config.keep_recent_steps = 40
     config.max_uncompressed_steps = 50
-    config.keep_compressed_steps = 10
+    config.keep_compressed_steps = 15
     config.max_compressed_steps = 20
     table = Table(show_header=False, box=None)
     table.add_column(style="cyan", no_wrap=True)
diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 297489856..983a2fffd 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -386,10 +386,18 @@ def create_compression_prompt(
 - **Compressed history** captures events, changes, and what happened over time.
 - **Knowledge** captures current beliefs, facts, and the latest state of things.
 
-Your summary will be added to the compressed history. It should describe what happened
+Episodic Memory vs. Semantic Memory
+- **Compressed History** = Episodic Memory = what happened (events, actions taken). 
+- **Knowledge** = Semantic Memory = what is currently true (facts, beliefs, current state).
+
+In the Human mind: 
+- **Episodic memory** = "I did X at time T." 
+- **Semantic memory** = "X is true."
+
+Your summary will be added to the compressed history (Episodic Memory). It should describe what happened
 (events, actions, outcomes, changes) without repeating prior history entries.
 
-If the execution history reveals important new facts or corrections to existing knowledge,
+If the execution history reveals important new facts or corrections your existing knowledge (Semantic Memory),
 include a <knowledge_updates> section. Use XML tags to add, update, or delete sections:
 - To ADD or UPDATE: <tag_name>new content</tag_name>
 - To DELETE an obsolete section: <tag_name/>
@@ -409,6 +417,7 @@ def create_compression_prompt(
 """
 
     return f"""Hello super-intelligence!
+This task is involved in your context compression.
 To your own benefit, please summarize the following agent execution history into a concise summary.
 {COMMON_COMPRESSION_INSTRUCTIONS}
 {history_section}{knowledge_section}{post_steps_section}{output_instruction}
@@ -611,6 +620,7 @@ def create_knowledge_extraction_prompt(
     post_steps_section = _build_post_steps_section(post_steps)
 
     return f"""Hello super-intelligence!
+This task is involved in your context compression.
 Please extract key knowledge from the following {len(compressed_steps)} summaries
 covering {total_steps} total steps of agent execution.
 These summaries are about to be removed from the context. Therefore, updating the knowledge
diff --git a/src/smolagents/prompts/code_agent.yaml b/src/smolagents/prompts/code_agent.yaml
index b93c0dade..b64c6f13f 100644
--- a/src/smolagents/prompts/code_agent.yaml
+++ b/src/smolagents/prompts/code_agent.yaml
@@ -178,7 +178,11 @@ system_prompt: |-
   23. Before you start coding, please search in the existing code for similar functions to those that you intend to implement. Avoid creating
    replicated code.
   24. If you have a tool to update knowledge, you must keep the knowledge updated.
+   In the case that you need a note pad, you can use the knowledge tool to store your notes inside <notepad></notepad>.
+   You can also use the knowledge tool to keep your task list status inside <tasklist></tasklist>.
+   These are ideas only. You can use the knowledge tool at your discretion. If you do use, you must keep the knowledge updated.
 
+  The `final_answer` tools ends the chat.
   Any final output that you would like to give such as "my name is Assistant" should be done via a python code block with final_answer("my name is Assistant").
 
   This is an example of python calling code with "this is the final answer" as final answer:
@@ -219,8 +223,8 @@ system_prompt: |-
   {{custom_instructions}}
   {%- endif %}
   
-  When the user asks me to run something with <runcode></runcode>, it means that
-  the user wants me to respond with the <runcode>...</runcode> string so the commands
+  When the user asks to run something with <runcode></runcode>, it means that
+  the user wants a response with the <runcode>...</runcode> string so the commands
   will run in his device.
   If you try to run <runcode> or <savetofile> at your end, you will fail. But, when you respond 
   with the <runcode> and <savetofile> tags (text), these tags will be run/saved in the user's device.
@@ -235,7 +239,7 @@ system_prompt: |-
   at each step, consider if you should summarize or move the previous step to memory, keeping only the relevant
   information. It is common to have long execution outputs bloated with warnings and irrelevant information. Each step's
   response is tagged with step="N" (e.g. <response step="3">) — use that number as the actionstep_id when calling these tools.
-  If you prefer, you can just move the previous step (or any previous step) to memory and write the relevant information
+  If you prefer, you can just move the previous step (or any other previous step) to memory and write the relevant information
   in your current <thoughts> section.
 
   When coding, try to make the smallest possible code to achieve the goal keeping good code quality.
@@ -279,6 +283,8 @@ planning:
     Try to make the smallest possible plan to achieve the goal keeping good outcome quality.
     If summarization/memory or similar tools are available, recommend to use these tools (or even add as tasks) before starting new major steps if applicable.
     If starting a completely new task unrelated to the previous, using summarization/memory or similar tools is a must.
+    If the agent has access to a knowledge tool, consider recommending the agent to store and keep updated the task list via the knowledge tool.
+    
 
   update_plan_pre_messages: |-
     Create a simple and doable plan towards solving a task.

From 009c21cc30ef982ef15b7365e96ab6783507e601 Mon Sep 17 00:00:00 2001
From: joaopauloschuler <43456488+joaopauloschuler@users.noreply.github.com>
Date: Tue, 3 Mar 2026 00:17:52 -0300
Subject: [PATCH 34/38] md compression - review.

---
 docs/compression.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/compression.md b/docs/compression.md
index a6d6cc882..3b7090dbe 100644
--- a/docs/compression.md
+++ b/docs/compression.md
@@ -38,38 +38,38 @@ exactly.
 ---
 
 ### 1. Working Memory vs. Long-Term Memory
-**BPSA:** Recent steps kept in **full detail** (`keep_recent_steps`). Older steps compressed into summaries.
-**Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
+* **BPSA:** Recent steps are kept in **full detail** (`keep_recent_steps`). Older steps are compressed into summaries.
+* **Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
 
 > *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
 
 ---
 
 ### 2. Sleep Consolidation → Phase 1 + Phase 2
-**BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
-**Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep is associated with abstracting and integrating those replays into semantic knowledge (Phase 2 analog). The analogy is functional: SWS and REM differ in their underlying neural mechanisms (sharp-wave ripples vs. theta oscillations) rather than being a clean "first pass / second pass" distinction, but the broad directionality — from detailed episodic replay toward abstract semantic integration — maps well onto BPSA's two phases.
+* **BPSA:** Two-phase pipeline — Phase 1 compresses live steps + extracts knowledge; Phase 2 merges accumulated compressed steps into deeper knowledge.
+* **Human mind:** Sleep has **two consolidation phases** — slow-wave sleep (SWS) replays episodic memories from hippocampus to neocortex (Phase 1 analog), and REM sleep is associated with abstracting and integrating those replays into semantic knowledge (Phase 2 analog). The analogy is functional: SWS and REM differ in their underlying neural mechanisms (sharp-wave ripples vs. theta oscillations) rather than being a clean "first pass / second pass" distinction, but the broad directionality — from detailed episodic replay toward abstract semantic integration — maps well onto BPSA's two phases.
 
 > *Phase 2 in BPSA ("merge_compressed when they accumulate") is functionally analogous to the later sleep stages that refine, consolidate, and eventually render raw episodic traces unnecessary for retrieval.*
 
 ---
 
 ### 3. Episodic vs. Semantic Memory
-**BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
-**Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
+* **BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
+* **Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
 
 > *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
 
 ---
 
 ### 4. Schemas / Semantic Networks → Tagged XML Knowledge
-**BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections can be added, updated, or deleted via diff operations.
-**Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
+* **BPSA:** Knowledge stored as tagged XML sections (`<plan>`, `<key_findings>`, `<current_status>`). Sections (tags) can be added, updated, or deleted via diff operations.
+* **Human mind:** Cognitive psychologists call these **schemas** — organised clusters of knowledge with labels and relationships, updated incrementally as new information arrives. The `merge_context()` add/update/delete operations mirror how schemas are revised.
 
 ---
 
 ### 5. Deliberate Belief Revision → Agent-Driven Knowledge Updates
-**BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
-**Human mind:** **Deliberate belief revision** — the conscious, intentional process of updating one's own knowledge when new evidence or reasoning warrants it. This is distinct from *metacognition* in the strict cognitive science sense (which additionally involves monitoring uncertainty and regulating reasoning strategies); what the agent does here is closer to deliberate note-taking and self-correction — updating a belief because a step's outcome has changed what is known to be true.
+* **BPSA:** The `update_knowledge` tool lets the *agent itself* explicitly revise its knowledge store at any point during live execution.
+* **Human mind:** **Deliberate belief revision** — the conscious, intentional process of updating one's own knowledge when new evidence or reasoning warrants it. This is distinct from *metacognition* in the strict cognitive science sense (which additionally involves monitoring uncertainty and regulating reasoning strategies); what the agent does here is closer to deliberate note-taking and self-correction — updating a belief because a step's outcome has changed what is known to be true.
 
 
 ## Architecture

From 2ace61b505806a47a9b3392c59d2535ce45b3e6a Mon Sep 17 00:00:00 2001
From: joaopauloschuler <43456488+joaopauloschuler@users.noreply.github.com>
Date: Tue, 3 Mar 2026 00:20:54 -0300
Subject: [PATCH 35/38] Enhance context compression description in README

Updated context compression description to include biologically inspired summarization.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dba6fc890..e528b53f5 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ limitations under the License.
 
 * 💻 **Interactive CLI ([`bpsa`](#cli-bpsa)):** Multi-turn REPL with slash commands, command history, tab completion, session stats, and auto-approve mode.
 * 🔄 **Infinite runtime CLI ([`ad-infinitum`](#cli-ad-infinitum)):** Allows agents to **run ad infinitum** via autonomous looping.
-* 🗜️ **Context compression**: Automatic LLM-based summarization of older memory steps to manage context window size during long-running tasks.
+* 🗜️ **Context compression**: Biologically inspired [Automatic LLM-based summarization](docs/compression.md) of older memory steps to manage context window size during long-running tasks.
 * 🌐 **Browser integration:** Control a headed Chromium browser from agent code blocks via Playwright (`--browser` flag).
 * 🖥️ **GUI interaction:** Launch, screenshot, click, type, and send keys to native GUI applications on X11 via xdotool/ImageMagick (`--gui-x11` flag).
 * 👁️ **Image loading:** Agents can load and visually inspect image files (plots, screenshots, diagrams) via the built-in `load_image` tool — always available, no flags needed.

From b30b6d3079fac127d913fa31c51961188fb6cddb Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 00:34:22 -0300
Subject: [PATCH 36/38] docs: improve biological inspirations section in
 compression.md

- Soften SWS/REM sleep analogy: clarify it is functional, not mechanistic
- Add Multiple Trace Theory caveat to the Standard Consolidation Model diagram
- Fix hippocampus/neocortex roles in Section 1 (neocortex holds compressed form)
- Remove anthropomorphising language in Section 3
- Rename Section 5 from 'Metacognition' to 'Deliberate Belief Revision'
- Add hypothesis caveat to convergence claim in intro paragraph
- Update diagram label from 'deeper sleep' to 'later consolidation'

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 docs/compression.md | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/compression.md b/docs/compression.md
index 3b7090dbe..bd87c5c4b 100644
--- a/docs/compression.md
+++ b/docs/compression.md
@@ -25,7 +25,7 @@ The entire two-phase design mirrors the **Standard Model of Memory Consolidation
 Experience → Hippocampus (short-lived, detailed)
                 ↓  (sleep / Phase 1)
            Compressed replay → early neocortex
-                ↓  (deeper sleep / Phase 2)
+                ↓  (later consolidation / Phase 2)
            Abstract semantic knowledge → late neocortex
                 ↓
            Hippocampus no longer needed for retrieval
@@ -35,11 +35,17 @@ Replace hippocampus with "action steps", early neocortex with "CompressedHistory
 late neocortex with "knowledge store" — and you have BPSA's compression pipeline almost
 exactly.
 
+> *Note: the Standard Model's claim that the hippocampus becomes unnecessary for retrieval
+> is contested by Multiple Trace Theory (Nadel & Moscovitch, 1997), which argues the
+> hippocampus remains involved in detailed episodic retrieval indefinitely. BPSA's
+> architecture — which does eventually discard original steps — maps onto the Standard
+> Model regardless of which biological theory proves correct.*
+
 ---
 
 ### 1. Working Memory vs. Long-Term Memory
 * **BPSA:** Recent steps are kept in **full detail** (`keep_recent_steps`). Older steps are compressed into summaries.
-* **Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated and compressed by the hippocampus over time.
+* **Human mind:** The **prefrontal cortex** holds a small working memory buffer (~7±2 items, Miller 1956) in full resolution. Older experiences are consolidated over time: the hippocampus holds the initial detailed trace and orchestrates its gradual transfer to the neocortex, where a compressed, generalised form eventually lives independently.
 
 > *"Keep 40 recent steps in full" is literally what your brain does right now — you remember today in detail, last Tuesday as a blur.*
 
@@ -55,7 +61,7 @@ exactly.
 
 ### 3. Episodic vs. Semantic Memory
 * **BPSA:** `CompressedHistoryStep` = what happened (events, actions taken). `knowledge` store = what is currently true (facts, beliefs, current state).
-* **Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." The brain explicitly separates these. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
+* **Human mind:** **Episodic memory** = "I did X at time T." **Semantic memory** = "X is true." Neuroscience has identified these as distinct systems with different neural substrates. Old episodic memories gradually convert to semantic ones — exactly what Phase 2 does.
 
 > *"Compressed history = events/changes over time; knowledge = current beliefs/facts" — this is straight from cognitive psychology textbooks.*
 

From a905821d565ad62d71a665c102c84ff61a26b353 Mon Sep 17 00:00:00 2001
From: joaopauloschuler <43456488+joaopauloschuler@users.noreply.github.com>
Date: Tue, 3 Mar 2026 00:38:47 -0300
Subject: [PATCH 37/38] Fix capitalization in context compression description

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e528b53f5..7f0f7a30b 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ limitations under the License.
 
 * 💻 **Interactive CLI ([`bpsa`](#cli-bpsa)):** Multi-turn REPL with slash commands, command history, tab completion, session stats, and auto-approve mode.
 * 🔄 **Infinite runtime CLI ([`ad-infinitum`](#cli-ad-infinitum)):** Allows agents to **run ad infinitum** via autonomous looping.
-* 🗜️ **Context compression**: Biologically inspired [Automatic LLM-based summarization](docs/compression.md) of older memory steps to manage context window size during long-running tasks.
+* 🗜️ **Context compression**: Biologically inspired [automatic LLM-based summarization](docs/compression.md) of older memory steps to manage context window size during long-running tasks.
 * 🌐 **Browser integration:** Control a headed Chromium browser from agent code blocks via Playwright (`--browser` flag).
 * 🖥️ **GUI interaction:** Launch, screenshot, click, type, and send keys to native GUI applications on X11 via xdotool/ImageMagick (`--gui-x11` flag).
 * 👁️ **Image loading:** Agents can load and visually inspect image files (plots, screenshots, diagrams) via the built-in `load_image` tool — always available, no flags needed.

From 076a6b664218ca18b895ec2c58d636dbaa819d28 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 22:57:14 -0300
Subject: [PATCH 38/38] Bump version from 1.23.10 to 1.23.11

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 pyproject.toml             | 2 +-
 src/smolagents/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0a166246e..8dbd31d34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "bpsa"
-version = "1.23.10"
+version = "1.23.11"
 description = "Beyond Python SmolAgents (BPSA) — a multi-language, multi-agent framework forked from HuggingFace smolagents."
 authors = [
   { name="Joao Paulo Schwarz Schuler" },
diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py
index f9d71ea65..4e0d07dcc 100644
--- a/src/smolagents/__init__.py
+++ b/src/smolagents/__init__.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.23.10"
+__version__ = "1.23.11"
 
 from .agent_types import *  # noqa: I001
 from .agents import *  # Above noqa avoids a circular dependency due to cli.py