diff --git a/apps/docs/integrations/openai.mdx b/apps/docs/integrations/openai.mdx
index 66b797c19..14aa4503a 100644
--- a/apps/docs/integrations/openai.mdx
+++ b/apps/docs/integrations/openai.mdx
@@ -44,7 +44,9 @@ import { withSupermemory } from "@supermemory/tools/openai"
 const openai = new OpenAI()
 
 // Wrap client with memory - memories auto-injected into system prompts
-const client = withSupermemory(openai, "user-123", {
+const client = withSupermemory(openai, {
+  containerTag: "user-123",
+  customId: "conversation-456",
   mode: "full",        // "profile" | "query" | "full"
   addMemory: "always", // "always" | "never"
 })
@@ -62,21 +64,19 @@ const response = await client.chat.completions.create({
 ### Configuration Options
 
 ```typescript
-const client = withSupermemory(openai, "user-123", {
-  // Memory search mode
-  mode: "full",  // "profile" (user profile only), "query" (search only), "full" (both)
-
-  // Auto-save conversations as memories
-  addMemory: "always",  // "always" | "never"
-
-  // Group messages into conversations
-  conversationId: "conv-456",
-
-  // Enable debug logging
-  verbose: true,
-
-  // Custom API endpoint
-  baseUrl: "https://custom.api.com"
+const client = withSupermemory(openai, {
+  // Required options
+  containerTag: "user-123",       // Scopes memories to this user
+  customId: "conversation-456", // Groups messages into conversations
+
+  // Optional options
+  mode: "full",         // "profile" (user profile only), "query" (search only), "full" (both)
+  addMemory: "always",  // "always" | "never" - auto-save conversations as memories
+  searchMode: "hybrid", // "memories" (default), "hybrid" (memories + chunks), "documents" (chunks only)
+  searchLimit: 15,      // Max search results for hybrid/documents mode (default: 10)
+  verbose: true,        // Enable debug logging
+  apiKey: "sm_...",     // Supermemory API key (or use SUPERMEMORY_API_KEY env var)
+  baseUrl: "https://custom.api.com" // Custom API endpoint
 })
 ```
 
@@ -88,10 +88,33 @@ const client = withSupermemory(openai, "user-123", {
 | `query` | Searches memories based on user message | Question answering |
 | `full` | Both profile and query-based search | Best for chatbots |
 
+### Search Modes (RAG Support)
+
+| Search Mode | Description | Use Case |
+|-------------|-------------|----------|
+| `memories` | Search only memory entries (default) | Personal memory recall |
+| `hybrid` | Search both memories AND document chunks | RAG with personalization |
+| `documents` | Search only document chunks | Pure RAG applications |
+
+```typescript
+// RAG example with hybrid search
+const ragClient = withSupermemory(openai, {
+  containerTag: "user-123",
+  customId: "conv-789",
+  mode: "full",
+  searchMode: "hybrid",  // Search both memories and document chunks
+  searchLimit: 15,       // Return up to 15 results
+})
+```
+
 ### Works with Responses API Too
 
 ```typescript
-const client = withSupermemory(openai, "user-123", { mode: "full" })
+const client = withSupermemory(openai, {
+  containerTag: "user-123",
+  customId: "conversation-456",
+  mode: "full",
+})
 
 // Memories injected into instructions
 const response = await client.responses.create({
diff --git a/packages/openai-sdk-python/README.md b/packages/openai-sdk-python/README.md
index cd771a977..b0e9c6b2f 100644
--- a/packages/openai-sdk-python/README.md
+++ b/packages/openai-sdk-python/README.md
@@ -35,7 +35,7 @@ The easiest way to add memory capabilities to your OpenAI client is using the `w
 ```python
 import asyncio
 from openai import AsyncOpenAI
-from supermemory_openai import with_supermemory, OpenAIMiddlewareOptions
+from supermemory_openai import with_supermemory, SupermemoryOpenAIOptions
 
 async def main():
     # Create OpenAI client
@@ -44,8 +44,9 @@ async def main():
     # Wrap with Supermemory middleware
     openai_with_memory = with_supermemory(
         openai,
-        container_tag="user-123",  # Unique identifier for user's memories
-        options=OpenAIMiddlewareOptions(
+        SupermemoryOpenAIOptions(
+            container_tag="user-123",       # Unique identifier for user's memories
+            custom_id="chat-session-1", # Required: groups messages into conversations
             mode="full",        # "profile", "query", or "full"
             verbose=True,       # Enable logging
             add_memory="always" # Automatically save conversations
@@ -118,11 +119,17 @@ The middleware also works with synchronous OpenAI clients:
 
 ```python
 from openai import OpenAI
-from supermemory_openai import with_supermemory
+from supermemory_openai import with_supermemory, SupermemoryOpenAIOptions
 
 # Sync client
 openai = OpenAI(api_key="your-openai-api-key")
-openai_with_memory = with_supermemory(openai, "user-123")
+openai_with_memory = with_supermemory(
+    openai,
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-1"
+    )
+)
 
 # Works the same way
 response = openai_with_memory.chat.completions.create(
@@ -137,12 +144,24 @@ response = openai_with_memory.chat.completions.create(
 
 ```python
 # Async context manager (recommended)
-async with with_supermemory(openai, "user-123") as client:
+async with with_supermemory(
+    openai,
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-1"
+    )
+) as client:
     response = await client.chat.completions.create(...)
 # Background tasks automatically waited for on exit
 
 # Manual cleanup
-client = with_supermemory(openai, "user-123")
+client = with_supermemory(
+    openai,
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-1"
+    )
+)
 response = await client.chat.completions.create(...)
 await client.wait_for_background_tasks()  # Ensure memory is saved
 ```
@@ -159,8 +178,11 @@ Injects all static and dynamic profile memories into every request. Best for mai
 ```python
 openai_with_memory = with_supermemory(
     openai,
-    "user-123",
-    OpenAIMiddlewareOptions(mode="profile")
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-1",
+        mode="profile"
+    )
 )
 ```
 
@@ -170,8 +192,11 @@ Only searches for memories relevant to the current user message. More efficient
 ```python
 openai_with_memory = with_supermemory(
     openai,
-    "user-123",
-    OpenAIMiddlewareOptions(mode="query")
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-1",
+        mode="query"
+    )
 )
 ```
 
@@ -181,8 +206,11 @@ Combines both profile and query modes - includes all profile memories plus relev
 ```python
 openai_with_memory = with_supermemory(
     openai,
-    "user-123",
-    OpenAIMiddlewareOptions(mode="full")
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-1",
+        mode="full"
+    )
 )
 ```
 
@@ -192,22 +220,30 @@ Control when conversations are automatically saved as memories:
 
 ```python
 # Always save conversations as memories
-OpenAIMiddlewareOptions(add_memory="always")
+SupermemoryOpenAIOptions(
+    container_tag="user-123",
+    custom_id="chat-session-1",
+    add_memory="always"
+)
 
 # Never save conversations (default)
-OpenAIMiddlewareOptions(add_memory="never")
+SupermemoryOpenAIOptions(
+    container_tag="user-123",
+    custom_id="chat-session-1",
+    add_memory="never"
+)
 ```
 
 ### Complete Configuration Example
 
 ```python
-from supermemory_openai import with_supermemory, OpenAIMiddlewareOptions
+from supermemory_openai import with_supermemory, SupermemoryOpenAIOptions
 
 openai_with_memory = with_supermemory(
     openai_client,
-    container_tag="user-123",
-    options=OpenAIMiddlewareOptions(
-        conversation_id="chat-session-456",  # Group messages into conversations
+    SupermemoryOpenAIOptions(
+        container_tag="user-123",
+        custom_id="chat-session-456",
         verbose=True,                        # Enable detailed logging
         mode="full",                         # Use both profile and query
         add_memory="always"                  # Auto-save conversations
@@ -291,27 +327,27 @@ Wraps an OpenAI client with automatic memory injection middleware.
 ```python
 def with_supermemory(
     openai_client: Union[OpenAI, AsyncOpenAI],
-    container_tag: str,
-    options: Optional[OpenAIMiddlewareOptions] = None
+    options: SupermemoryOpenAIOptions
 ) -> Union[OpenAI, AsyncOpenAI]
 ```
 
 **Parameters:**
 - `openai_client`: OpenAI or AsyncOpenAI client instance
-- `container_tag`: Unique identifier for memory storage (e.g., user ID)
-- `options`: Configuration options (see `OpenAIMiddlewareOptions`)
+- `options`: Configuration options (see `SupermemoryOpenAIOptions`)
 
-#### `OpenAIMiddlewareOptions`
+#### `SupermemoryOpenAIOptions`
 
 Configuration dataclass for middleware behavior.
 
 ```python
 @dataclass
-class OpenAIMiddlewareOptions:
-    conversation_id: Optional[str] = None      # Group messages into conversations
-    verbose: bool = False                      # Enable detailed logging
+class SupermemoryOpenAIOptions:
+    container_tag: str                      # Required: unique identifier for memory storage
+    custom_id: str                    # Required: groups messages into conversations
+    api_key: Optional[str] = None           # Supermemory API key (or use env var)
+    verbose: bool = False                   # Enable detailed logging
     mode: Literal["profile", "query", "full"] = "profile"  # Memory injection mode
-    add_memory: Literal["always", "never"] = "never"       # Auto-save behavior
+    add_memory: Literal["always", "never"] = "always"      # Auto-save behavior
 ```
 
 ### SupermemoryTools
@@ -349,7 +385,13 @@ from supermemory_openai import (
 
 try:
     # This will raise SupermemoryConfigurationError if API key is missing
-    client = with_supermemory(openai_client, "user-123")
+    client = with_supermemory(
+        openai_client,
+        SupermemoryOpenAIOptions(
+            container_tag="user-123",
+            custom_id="chat-session-1"
+        )
+    )
 
     response = await client.chat.completions.create(
         messages=[{"role": "user", "content": "Hello"}],
diff --git a/packages/openai-sdk-python/pyproject.toml b/packages/openai-sdk-python/pyproject.toml
index c808d79f8..9c852fecd 100644
--- a/packages/openai-sdk-python/pyproject.toml
+++ b/packages/openai-sdk-python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "supermemory-openai-sdk"
-version = "1.0.3"
+version = "2.0.0"
 description = "Memory tools for OpenAI function calling with supermemory"
 readme = "README.md"
 license = "MIT"
diff --git a/packages/openai-sdk-python/src/supermemory_openai/__init__.py b/packages/openai-sdk-python/src/supermemory_openai/__init__.py
index 15adf20c2..3b42d4f9e 100644
--- a/packages/openai-sdk-python/src/supermemory_openai/__init__.py
+++ b/packages/openai-sdk-python/src/supermemory_openai/__init__.py
@@ -18,7 +18,7 @@
 
 from .middleware import (
     with_supermemory,
-    OpenAIMiddlewareOptions,
+    SupermemoryOpenAIOptions,
     SupermemoryOpenAIWrapper,
 )
 
@@ -58,7 +58,7 @@
     "create_add_memory_tool",
     # Middleware
     "with_supermemory",
-    "OpenAIMiddlewareOptions",
+    "SupermemoryOpenAIOptions",
     "SupermemoryOpenAIWrapper",
     # Utils
     "Logger",
diff --git a/packages/openai-sdk-python/src/supermemory_openai/middleware.py b/packages/openai-sdk-python/src/supermemory_openai/middleware.py
index 12100065c..5d748da57 100644
--- a/packages/openai-sdk-python/src/supermemory_openai/middleware.py
+++ b/packages/openai-sdk-python/src/supermemory_openai/middleware.py
@@ -29,13 +29,29 @@
 
 
 @dataclass
-class OpenAIMiddlewareOptions:
+class SupermemoryOpenAIOptions:
     """Configuration options for OpenAI middleware."""
 
-    conversation_id: Optional[str] = None
+    container_tag: str
+    """Container tag/identifier for memory search (e.g., user ID)."""
+
+    custom_id: str
+    """Custom ID to group messages into a single document (e.g., conversation ID)."""
+
+    api_key: Optional[str] = None
+    """Supermemory API key (falls back to SUPERMEMORY_API_KEY env var)."""
+
+    base_url: Optional[str] = None
+    """Custom Supermemory API base URL (defaults to https://api.supermemory.ai)."""
+
     verbose: bool = False
+    """Enable detailed logging of memory search and injection."""
+
     mode: Literal["profile", "query", "full"] = "profile"
-    add_memory: Literal["always", "never"] = "never"
+    """Memory retrieval mode: 'profile', 'query', or 'full'."""
+
+    add_memory: Literal["always", "never"] = "always"
+    """Memory persistence mode: 'always' (default) or 'never'."""
 
 
 class SupermemoryProfileSearch:
@@ -46,12 +62,22 @@ def __init__(self, data: dict[str, Any]):
         self.search_results: dict[str, Any] = data.get("searchResults", {})
 
 
+def _normalize_base_url(url: Optional[str]) -> str:
+    """Normalize the base URL, removing trailing slashes."""
+    default_url = "https://api.supermemory.ai"
+    if not url:
+        return default_url
+    return url.rstrip("/")
+
+
 async def supermemory_profile_search(
     container_tag: str,
     query_text: str,
     api_key: str,
+    base_url: Optional[str] = None,
 ) -> SupermemoryProfileSearch:
     """Search for memories using the SuperMemory profile API."""
+    normalized_base_url = _normalize_base_url(base_url)
     payload = {
         "containerTag": container_tag,
     }
@@ -63,7 +89,7 @@ async def supermemory_profile_search(
 
         async with aiohttp.ClientSession() as session:
             async with session.post(
-                "https://api.supermemory.ai/v4/profile",
+                f"{normalized_base_url}/v4/profile",
                 headers={
                     "Content-Type": "application/json",
                     "Authorization": f"Bearer {api_key}",
@@ -86,7 +112,7 @@ async def supermemory_profile_search(
         import requests
 
         response = requests.post(
-            "https://api.supermemory.ai/v4/profile",
+            f"{normalized_base_url}/v4/profile",
             headers={
                 "Content-Type": "application/json",
                 "Authorization": f"Bearer {api_key}",
@@ -110,6 +136,7 @@ async def add_system_prompt(
     logger: Logger,
     mode: Literal["profile", "query", "full"],
     api_key: str,
+    base_url: Optional[str] = None,
 ) -> list[ChatCompletionMessageParam]:
     """Add memory-enhanced system prompts to chat completion messages."""
     system_prompt_exists = any(msg.get("role") == "system" for msg in messages)
@@ -117,7 +144,7 @@ async def add_system_prompt(
     query_text = get_last_user_message(messages) if mode != "profile" else ""
 
     memories_response = await supermemory_profile_search(
-        container_tag, query_text, api_key
+        container_tag, query_text, api_key, base_url
     )
 
     profile = memories_response.profile or {}
@@ -208,15 +235,124 @@ async def add_system_prompt(
     return [system_message] + messages
 
 
+async def add_conversation(
+    conversation_id: str,
+    messages: list[dict[str, Any]],
+    container_tags: list[str],
+    api_key: str,
+    base_url: Optional[str] = None,
+) -> dict[str, Any]:
+    """
+    Adds a conversation to Supermemory using the /v4/conversations endpoint.
+    
+    This endpoint supports structured messages with roles (user, assistant, system, tool).
+    """
+    normalized_base_url = _normalize_base_url(base_url)
+    url = f"{normalized_base_url}/v4/conversations"
+    
+    payload = {
+        "conversationId": conversation_id,
+        "messages": messages,
+        "containerTags": container_tags,
+    }
+    
+    try:
+        import aiohttp
+        
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                url,
+                headers={
+                    "Content-Type": "application/json",
+                    "Authorization": f"Bearer {api_key}",
+                },
+                json=payload,
+            ) as response:
+                if not response.ok:
+                    error_text = await response.text()
+                    raise SupermemoryAPIError(
+                        f"Failed to add conversation: {response.status}",
+                        status_code=response.status,
+                        response_text=error_text,
+                    )
+                return await response.json()
+    except ImportError:
+        import requests
+        
+        response = requests.post(
+            url,
+            headers={
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {api_key}",
+            },
+            json=payload,
+        )
+        
+        if not response.ok:
+            raise SupermemoryAPIError(
+                f"Failed to add conversation: {response.status_code}",
+                status_code=response.status_code,
+                response_text=response.text,
+            )
+        return response.json()
+
+
 async def add_memory_tool(
     client: supermemory.Supermemory,
     container_tag: str,
     content: str,
     custom_id: Optional[str],
     logger: Logger,
+    messages: Optional[list[dict[str, Any]]] = None,
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
 ) -> None:
-    """Add a new memory to the SuperMemory system."""
+    """
+    Add a new memory to the SuperMemory system.
+    
+    If custom_id starts with "conversation:" and messages are provided, uses the
+    /v4/conversations endpoint with structured messages instead of the memories endpoint.
+    """
     try:
+        # Use conversations endpoint if we have structured messages
+        if custom_id and custom_id.startswith("conversation:") and messages and api_key:
+            conversation_id = custom_id.replace("conversation:", "")
+            
+            # Convert messages to conversation format
+            conversation_messages = []
+            for msg in messages:
+                conv_msg: dict[str, Any] = {
+                    "role": msg.get("role", "user"),
+                    "content": msg.get("content", ""),
+                }
+                if "name" in msg:
+                    conv_msg["name"] = msg["name"]
+                if "tool_calls" in msg:
+                    conv_msg["tool_calls"] = msg["tool_calls"]
+                if "tool_call_id" in msg:
+                    conv_msg["tool_call_id"] = msg["tool_call_id"]
+                conversation_messages.append(conv_msg)
+            
+            response = await add_conversation(
+                conversation_id=conversation_id,
+                messages=conversation_messages,
+                container_tags=[container_tag],
+                api_key=api_key,
+                base_url=base_url,
+            )
+            
+            logger.info(
+                "Conversation saved successfully via /v4/conversations",
+                {
+                    "container_tag": container_tag,
+                    "conversation_id": conversation_id,
+                    "message_count": len(messages),
+                    "response_id": response.get("id"),
+                },
+            )
+            return
+        
+        # Fallback to basic memory storage
         add_params = {
             "content": content,
             "container_tags": [container_tag],
@@ -262,13 +398,14 @@ class SupermemoryOpenAIWrapper:
     def __init__(
         self,
         openai_client: Union[OpenAI, AsyncOpenAI],
-        container_tag: str,
-        options: Optional[OpenAIMiddlewareOptions] = None,
+        options: SupermemoryOpenAIOptions,
     ):
         self._client: Union[OpenAI, AsyncOpenAI] = openai_client
-        self._container_tag: str = container_tag
-        self._options: OpenAIMiddlewareOptions = options or OpenAIMiddlewareOptions()
-        self._logger: Logger = create_logger(self._options.verbose)
+        self._container_tag: str = options.container_tag
+        self._custom_id: str = options.custom_id
+        self._base_url: Optional[str] = options.base_url
+        self._options: SupermemoryOpenAIOptions = options
+        self._logger: Logger = create_logger(options.verbose)
 
         # Track background tasks to ensure they complete
         self._background_tasks: set[asyncio.Task] = set()
@@ -280,9 +417,14 @@ def __init__(
             )
 
         api_key = self._get_api_key()
+        normalized_base_url = _normalize_base_url(self._base_url)
         try:
+            # Pass base_url to Supermemory client for memory write operations
+            client_kwargs = {"api_key": api_key}
+            if normalized_base_url != "https://api.supermemory.ai":
+                client_kwargs["base_url"] = normalized_base_url
             self._supermemory_client: supermemory.Supermemory = supermemory.Supermemory(
-                api_key=api_key
+                **client_kwargs
             )
         except Exception as e:
             raise SupermemoryConfigurationError(
@@ -293,13 +435,13 @@ def __init__(
         self._wrap_chat_completions()
 
     def _get_api_key(self) -> str:
-        """Get Supermemory API key from environment."""
+        """Get Supermemory API key from options or environment."""
         import os
 
-        api_key = os.getenv("SUPERMEMORY_API_KEY")
+        api_key = self._options.api_key or os.getenv("SUPERMEMORY_API_KEY")
         if not api_key:
             raise SupermemoryConfigurationError(
-                "SUPERMEMORY_API_KEY environment variable is required but not set"
+                "Supermemory API key is required. Provide it via options.api_key or set SUPERMEMORY_API_KEY environment variable."
             )
         return api_key
 
@@ -336,12 +478,12 @@ async def _create_with_memory_async(
             if user_message and user_message.strip():
                 content = (
                     get_conversation_content(messages)
-                    if self._options.conversation_id
+                    if self._custom_id
                     else user_message
                 )
-                custom_id = (
-                    f"conversation:{self._options.conversation_id}"
-                    if self._options.conversation_id
+                memory_custom_id = (
+                    f"conversation:{self._custom_id}"
+                    if self._custom_id
                     else None
                 )
 
@@ -351,8 +493,11 @@ async def _create_with_memory_async(
                         self._supermemory_client,
                         self._container_tag,
                         content,
-                        custom_id,
+                        memory_custom_id,
                         self._logger,
+                        messages,
+                        self._get_api_key(),
+                        self._base_url,
                     )
                 )
 
@@ -399,7 +544,7 @@ def handle_task_exception(task_obj):
             "Starting memory search",
             {
                 "container_tag": self._container_tag,
-                "conversation_id": self._options.conversation_id,
+                "custom_id": self._custom_id,
                 "mode": self._options.mode,
             },
         )
@@ -410,6 +555,7 @@ def handle_task_exception(task_obj):
             self._logger,
             self._options.mode,
             self._get_api_key(),
+            self._base_url,
         )
 
         kwargs["messages"] = enhanced_messages
@@ -430,12 +576,12 @@ def _create_with_memory_sync(
             if user_message and user_message.strip():
                 content = (
                     get_conversation_content(messages)
-                    if self._options.conversation_id
+                    if self._custom_id
                     else user_message
                 )
-                custom_id = (
-                    f"conversation:{self._options.conversation_id}"
-                    if self._options.conversation_id
+                memory_custom_id = (
+                    f"conversation:{self._custom_id}"
+                    if self._custom_id
                     else None
                 )
 
@@ -446,8 +592,11 @@ def _create_with_memory_sync(
                             self._supermemory_client,
                             self._container_tag,
                             content,
-                            custom_id,
+                            memory_custom_id,
                             self._logger,
+                            messages,
+                            self._get_api_key(),
+                            self._base_url,
                         )
                     )
                 except RuntimeError as e:
@@ -483,7 +632,7 @@ def _create_with_memory_sync(
             "Starting memory search",
             {
                 "container_tag": self._container_tag,
-                "conversation_id": self._options.conversation_id,
+                "custom_id": self._custom_id,
                 "mode": self._options.mode,
             },
         )
@@ -497,6 +646,7 @@ def _create_with_memory_sync(
                     self._logger,
                     self._options.mode,
                     self._get_api_key(),
+                    self._base_url,
                 )
             )
         except RuntimeError as e:
@@ -513,6 +663,7 @@ def _create_with_memory_sync(
                             self._logger,
                             self._options.mode,
                             self._get_api_key(),
+                            self._base_url,
                         ),
                     )
                     enhanced_messages = future.result()
@@ -617,43 +768,34 @@ def __getattr__(self, name: str) -> Any:
 
 def with_supermemory(
     openai_client: Union[OpenAI, AsyncOpenAI],
-    container_tag: str,
-    options: Optional[OpenAIMiddlewareOptions] = None,
+    options: SupermemoryOpenAIOptions,
 ) -> Union[OpenAI, AsyncOpenAI]:
     """
     Wraps an OpenAI client with SuperMemory middleware to automatically inject relevant memories
     into the system prompt based on the user's message content.
 
-    This middleware searches the supermemory API for relevant memories using the container tag
-    and user message, then either appends memories to an existing system prompt or creates
-    a new system prompt with the memories.
-
     Args:
         openai_client: The OpenAI client to wrap with SuperMemory middleware
-        container_tag: The container tag/identifier for memory search (e.g., user ID, project ID)
-        options: Optional configuration options for the middleware
+        options: Configuration options for the middleware
 
     Returns:
         An OpenAI client with SuperMemory middleware injected
 
     Example:
         ```python
-        from supermemory_openai import with_supermemory, OpenAIMiddlewareOptions
+        from supermemory_openai import with_supermemory, SupermemoryOpenAIOptions
         from openai import OpenAI
 
-        # Create OpenAI client with supermemory middleware
         openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
         openai_with_supermemory = with_supermemory(
             openai,
-            "user-123",
-            OpenAIMiddlewareOptions(
-                conversation_id="conversation-456",
+            SupermemoryOpenAIOptions(
+                container_tag="user-123",
+                custom_id="conversation-456",
                 mode="full",
-                add_memory="always"
             )
         )
 
-        # Use normally - memories will be automatically injected
         response = await openai_with_supermemory.chat.completions.create(
             model="gpt-4",
             messages=[
@@ -663,9 +805,25 @@ def with_supermemory(
         ```
 
     Raises:
-        ValueError: When SUPERMEMORY_API_KEY environment variable is not set
+        ValueError: When container_tag is not provided or is empty
+        ValueError: When custom_id is not provided or is empty
+        SupermemoryConfigurationError: When API key is not set
         Exception: When supermemory API request fails
     """
-    wrapper = SupermemoryOpenAIWrapper(openai_client, container_tag, options)
+    if not options.container_tag or not options.container_tag.strip():
+        raise ValueError(
+            "[supermemory] container_tag is required and must be a non-empty string. "
+            "This identifies the user or container for memory scoping. "
+            "Example: SupermemoryOpenAIOptions(container_tag='user-123', ...)"
+        )
+
+    if not options.custom_id or not options.custom_id.strip():
+        raise ValueError(
+            "[supermemory] custom_id is required and must be a non-empty string. "
+            "This ensures messages are grouped into the same document for a conversation. "
+            "Example: SupermemoryOpenAIOptions(container_tag='user-123', custom_id='conv-456', ...)"
+        )
+
+    wrapper = SupermemoryOpenAIWrapper(openai_client, options)
     # Return the wrapper, which delegates all attributes to the original client
     return cast(Union[OpenAI, AsyncOpenAI], wrapper)
diff --git a/packages/openai-sdk-python/tests/test_middleware.py b/packages/openai-sdk-python/tests/test_middleware.py
index a9f73af1b..5c2d52287 100644
--- a/packages/openai-sdk-python/tests/test_middleware.py
+++ b/packages/openai-sdk-python/tests/test_middleware.py
@@ -14,7 +14,7 @@
 try:
     from supermemory_openai import (
         with_supermemory,
-        OpenAIMiddlewareOptions,
+        SupermemoryOpenAIOptions,
         SupermemoryOpenAIWrapper,
     )
 except ImportError:
@@ -22,7 +22,7 @@
     sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), "src"))
     from supermemory_openai import (
         with_supermemory,
-        OpenAIMiddlewareOptions,
+        SupermemoryOpenAIOptions,
         SupermemoryOpenAIWrapper,
     )
 
@@ -103,7 +103,13 @@ class TestMiddlewareInitialization:
     def test_with_supermemory_basic(self, mock_openai_client):
         """Test basic middleware initialization."""
         with patch.dict(os.environ, {"SUPERMEMORY_API_KEY": "test-key"}):
-            wrapped_client = with_supermemory(mock_openai_client, "user-123")
+            wrapped_client = with_supermemory(
+                mock_openai_client,
+                SupermemoryOpenAIOptions(
+                    container_tag="user-123",
+                    custom_id="test-conv"
+                )
+            )
 
             assert isinstance(wrapped_client, SupermemoryOpenAIWrapper)
             assert wrapped_client._container_tag == "user-123"
@@ -112,17 +118,19 @@ def test_with_supermemory_basic(self, mock_openai_client):
 
     def test_with_supermemory_with_options(self, mock_openai_client):
         """Test middleware initialization with options."""
-        options = OpenAIMiddlewareOptions(
-            conversation_id="conv-456",
-            verbose=True,
-            mode="full",
-            add_memory="always"
-        )
-
         with patch.dict(os.environ, {"SUPERMEMORY_API_KEY": "test-key"}):
-            wrapped_client = with_supermemory(mock_openai_client, "user-123", options)
+            wrapped_client = with_supermemory(
+                mock_openai_client,
+                SupermemoryOpenAIOptions(
+                    container_tag="user-123",
+                    custom_id="conv-456",
+                    verbose=True,
+                    mode="full",
+                    add_memory="always"
+                )
+            )
 
-            assert wrapped_client._options.conversation_id == "conv-456"
+            assert wrapped_client._custom_id == "conv-456"
             assert wrapped_client._options.verbose is True
             assert wrapped_client._options.mode == "full"
             assert wrapped_client._options.add_memory == "always"
@@ -132,15 +140,51 @@ def test_missing_api_key_raises_error(self, mock_openai_client):
         from supermemory_openai.exceptions import SupermemoryConfigurationError
 
         with patch.dict(os.environ, {}, clear=True):
-            with pytest.raises(SupermemoryConfigurationError, match="SUPERMEMORY_API_KEY"):
-                with_supermemory(mock_openai_client, "user-123")
+            with pytest.raises(SupermemoryConfigurationError, match="API key is required"):
+                with_supermemory(
+                    mock_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv"
+                    )
+                )
+
+    def test_empty_custom_id_raises_error(self, mock_openai_client):
+        """Test that empty custom_id raises ValueError with helpful message."""
+        with patch.dict(os.environ, {"SUPERMEMORY_API_KEY": "test-key"}):
+            with pytest.raises(ValueError, match=r"\[supermemory\] custom_id is required"):
+                with_supermemory(
+                    mock_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id=""
+                    )
+                )
+
+    def test_whitespace_custom_id_raises_error(self, mock_openai_client):
+        """Test that whitespace-only custom_id raises ValueError."""
+        with patch.dict(os.environ, {"SUPERMEMORY_API_KEY": "test-key"}):
+            with pytest.raises(ValueError, match=r"\[supermemory\] custom_id is required"):
+                with_supermemory(
+                    mock_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="   "
+                    )
+                )
 
     def test_wrapper_delegates_attributes(self, mock_openai_client):
         """Test that wrapper delegates attributes to wrapped client."""
         mock_openai_client.models = Mock()
 
         with patch.dict(os.environ, {"SUPERMEMORY_API_KEY": "test-key"}):
-            wrapped_client = with_supermemory(mock_openai_client, "user-123")
+            wrapped_client = with_supermemory(
+                mock_openai_client,
+                SupermemoryOpenAIOptions(
+                    container_tag="user-123",
+                    custom_id="test-conv"
+                )
+            )
 
             # Should delegate to the original client
             assert wrapped_client.models is mock_openai_client.models
@@ -165,8 +209,11 @@ async def test_memory_injection_profile_mode(
 
                 wrapped_client = with_supermemory(
                     mock_async_openai_client,
-                    "user-123",
-                    OpenAIMiddlewareOptions(mode="profile")
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv",
+                        mode="profile"
+                    )
                 )
 
                 messages = [
@@ -207,8 +254,11 @@ async def test_memory_injection_query_mode(
 
                 wrapped_client = with_supermemory(
                     mock_async_openai_client,
-                    "user-123",
-                    OpenAIMiddlewareOptions(mode="query")
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv",
+                        mode="query"
+                    )
                 )
 
                 messages = [
@@ -241,8 +291,11 @@ async def test_memory_injection_full_mode(
 
                 wrapped_client = with_supermemory(
                     mock_async_openai_client,
-                    "user-123",
-                    OpenAIMiddlewareOptions(mode="full")
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv",
+                        mode="full"
+                    )
                 )
 
                 messages = [
@@ -278,7 +331,13 @@ async def test_existing_system_prompt_enhancement(
                 mock_search.return_value.profile = mock_supermemory_response["profile"]
                 mock_search.return_value.search_results = mock_supermemory_response["searchResults"]
 
-                wrapped_client = with_supermemory(mock_async_openai_client, "user-123")
+                wrapped_client = with_supermemory(
+                    mock_async_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv"
+                    )
+                )
 
                 messages = [
                     {"role": "system", "content": "You are a helpful assistant."},
@@ -324,8 +383,11 @@ async def test_add_memory_always_mode(
 
                     wrapped_client = with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="always")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="always"
+                        )
                     )
 
                     messages = [
@@ -359,8 +421,11 @@ async def test_add_memory_never_mode(
 
                     wrapped_client = with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="never")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="never"
+                        )
                     )
 
                     await wrapped_client.chat.completions.create(
@@ -386,7 +451,13 @@ def test_sync_client_compatibility(self, mock_openai_client, mock_openai_respons
                 mock_search.return_value.profile = {"static": [], "dynamic": []}
                 mock_search.return_value.search_results = {"results": []}
 
-                wrapped_client = with_supermemory(mock_openai_client, "user-123")
+                wrapped_client = with_supermemory(
+                    mock_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv"
+                    )
+                )
 
                 # This should work for sync clients too
                 wrapped_client.chat.completions.create(
@@ -410,7 +481,13 @@ async def test_in_async():
                     mock_search.return_value.profile = {"static": [], "dynamic": []}
                     mock_search.return_value.search_results = {"results": []}
 
-                    wrapped_client = with_supermemory(mock_openai_client, "user-123")
+                    wrapped_client = with_supermemory(
+                    mock_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv"
+                    )
+                )
 
                     # This should work even when called from async context
                     result = wrapped_client.chat.completions.create(
@@ -441,8 +518,11 @@ def test_sync_client_memory_addition_error_handling(self, mock_openai_client, mo
 
                     wrapped_client = with_supermemory(
                         mock_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="always")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="always"
+                        )
                     )
 
                     # Should not raise exception, should continue with main request
@@ -470,7 +550,13 @@ async def test_supermemory_api_error_handling(
             with patch("supermemory_openai.middleware.supermemory_profile_search") as mock_search:
                 mock_search.side_effect = Exception("API Error")
 
-                wrapped_client = with_supermemory(mock_async_openai_client, "user-123")
+                wrapped_client = with_supermemory(
+                    mock_async_openai_client,
+                    SupermemoryOpenAIOptions(
+                        container_tag="user-123",
+                        custom_id="test-conv"
+                    )
+                )
 
                 # Should not raise exception, should fall back gracefully
                 with pytest.raises(Exception):
@@ -490,8 +576,11 @@ async def test_no_user_message_handling(
         with patch.dict(os.environ, {"SUPERMEMORY_API_KEY": "test-key"}):
             wrapped_client = with_supermemory(
                 mock_async_openai_client,
-                "user-123",
-                OpenAIMiddlewareOptions(mode="query")
+                SupermemoryOpenAIOptions(
+                    container_tag="user-123",
+                    custom_id="test-conv",
+                    mode="query"
+                )
             )
 
             messages = [
@@ -529,8 +618,11 @@ async def test_verbose_logging(
 
                     wrapped_client = with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(verbose=True)
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            verbose=True
+                        )
                     )
 
                     await wrapped_client.chat.completions.create(
@@ -558,8 +650,11 @@ async def test_silent_logging(
 
                     wrapped_client = with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(verbose=False)
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            verbose=False
+                        )
                     )
 
                     await wrapped_client.chat.completions.create(
@@ -597,8 +692,11 @@ async def slow_add_memory(*args, **kwargs):
 
                     wrapped_client = with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="always")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="always"
+                        )
                     )
 
                     # Make a request that should create a background task
@@ -644,8 +742,11 @@ async def slow_add_memory(*args, **kwargs):
                     # Use async context manager
                     async with with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="always")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="always"
+                        )
                     ) as wrapped_client:
                         await wrapped_client.chat.completions.create(
                             model="gpt-4",
@@ -680,8 +781,11 @@ async def hanging_add_memory(*args, **kwargs):
 
                     wrapped_client = with_supermemory(
                         mock_async_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="always")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="always"
+                        )
                     )
 
                     await wrapped_client.chat.completions.create(
@@ -714,8 +818,11 @@ def test_sync_context_manager_cleanup(
                     # Use sync context manager
                     with with_supermemory(
                         mock_openai_client,
-                        "user-123",
-                        OpenAIMiddlewareOptions(add_memory="always")
+                        SupermemoryOpenAIOptions(
+                            container_tag="user-123",
+                            custom_id="test-conv",
+                            add_memory="always"
+                        )
                     ) as wrapped_client:
                         wrapped_client.chat.completions.create(
                             model="gpt-4",
diff --git a/packages/openai-sdk-python/uv.lock b/packages/openai-sdk-python/uv.lock
index 26688c59f..f27d810ad 100644
--- a/packages/openai-sdk-python/uv.lock
+++ b/packages/openai-sdk-python/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.8.1"
 resolution-markers = [
     "python_full_version >= '3.10'",
@@ -2427,7 +2427,7 @@ wheels = [
 
 [[package]]
 name = "supermemory-openai-sdk"
-version = "1.0.3"
+version = "2.0.0"
 source = { editable = "." }
 dependencies = [
     { name = "openai" },
diff --git a/packages/tools/README.md b/packages/tools/README.md
index 2d03411d3..8b5ddde49 100644
--- a/packages/tools/README.md
+++ b/packages/tools/README.md
@@ -262,11 +262,15 @@ The `MemoryPromptData` object provides:
 The `withSupermemory` function creates an OpenAI client with SuperMemory middleware automatically injected:
 
 ```typescript
+import OpenAI from "openai"
 import { withSupermemory } from "@supermemory/tools/openai"
 
+const openai = new OpenAI()
+
 // Create OpenAI client with supermemory middleware
-const openaiWithSupermemory = withSupermemory("user-123", {
-  conversationId: "conversation-456",
+const openaiWithSupermemory = withSupermemory(openai, {
+  containerTag: "user-123",
+  customId: "conversation-456",
   mode: "full",
   addMemory: "always",
   verbose: true,
@@ -285,36 +289,34 @@ console.log(completion.choices[0]?.message?.content)
 
 #### OpenAI Middleware Options
 
-The middleware supports the same configuration options as the AI SDK version:
+The middleware accepts a single options object with the following properties:
 
 ```typescript
-const openaiWithSupermemory = withSupermemory("user-123", {
-  conversationId: "conversation-456", // Group messages for contextual memory
-  mode: "full",                       // "profile" | "query" | "full"
-  addMemory: "always",                // "always" | "never"
-  verbose: true,                      // Enable detailed logging
-})
+interface SupermemoryOpenAIOptions {
+  containerTag: string           // Required - User/container identifier for scoping memories
+  customId: string               // Required - Groups messages into conversations
+  apiKey?: string                // Supermemory API key (or use SUPERMEMORY_API_KEY env var)
+  baseUrl?: string               // Custom API endpoint
+  mode?: "profile" | "query" | "full"  // Memory search mode (default: "profile")
+  searchMode?: "memories" | "hybrid" | "documents"  // Search mode for RAG (default: "memories")
+  searchLimit?: number           // Max search results for hybrid/documents mode (default: 10)
+  addMemory?: "always" | "never"       // Auto-save conversations (default: "always")
+  verbose?: boolean              // Enable debug logging (default: false)
+  promptTemplate?: PromptTemplate  // Custom function to format memory data
+}
 ```
 
-#### Advanced Usage with Custom OpenAI Options
-
-You can also pass custom OpenAI client options:
+#### Advanced Usage
 
 ```typescript
 import { withSupermemory } from "@supermemory/tools/openai"
 
-const openaiWithSupermemory = withSupermemory(
-  "user-123", 
-  {
-    mode: "profile",
-    addMemory: "always",
-  },
-  {
-    baseURL: "https://api.openai.com/v1",
-    organization: "org-123",
-  },
-  "custom-api-key" // Optional: custom API key
-)
+const openaiWithSupermemory = withSupermemory(openai, {
+  containerTag: "user-123",
+  customId: "conversation-456",
+  mode: "profile",
+  addMemory: "always",
+})
 
 const completion = await openaiWithSupermemory.chat.completions.create({
   model: "gpt-4o-mini",
@@ -322,6 +324,28 @@ const completion = await openaiWithSupermemory.chat.completions.create({
 })
 ```
 
+#### RAG with Hybrid Search
+
+Use `searchMode` to search both memories AND document chunks for RAG applications:
+
+```typescript
+import { withSupermemory } from "@supermemory/tools/openai"
+
+// Hybrid search: memories + document chunks
+const ragClient = withSupermemory(openai, {
+  containerTag: "user-123",
+  customId: "conversation-789",
+  mode: "full",
+  searchMode: "hybrid",  // Search both memories and document chunks
+  searchLimit: 15,       // Return up to 15 results
+})
+
+const completion = await ragClient.chat.completions.create({
+  model: "gpt-4o-mini",
+  messages: [{ role: "user", content: "What do my uploaded documents say about project X?" }],
+})
+```
+
 #### Next.js API Route Example
 
 Here's a complete example for a Next.js API route:
@@ -329,16 +353,20 @@ Here's a complete example for a Next.js API route:
 ```typescript
 // app/api/chat/route.ts
 import { withSupermemory } from "@supermemory/tools/openai"
+import OpenAI from "openai"
 import type { OpenAI as OpenAIType } from "openai"
 
 export async function POST(req: Request) {
-  const { messages, conversationId } = (await req.json()) as {
+  const { messages, customId } = (await req.json()) as {
     messages: OpenAIType.Chat.Completions.ChatCompletionMessageParam[]
-    conversationId: string
+    customId: string
   }
 
-  const openaiWithSupermemory = withSupermemory("user-123", {
-    conversationId,
+  const openai = new OpenAI()
+
+  const openaiWithSupermemory = withSupermemory(openai, {
+    containerTag: "user-123",
+    customId,
     mode: "full",
     addMemory: "always",
     verbose: true,
@@ -588,7 +616,7 @@ interface SupermemoryMastraOptions {
   apiKey?: string              // Supermemory API key (or use SUPERMEMORY_API_KEY env var)
   baseUrl?: string             // Custom API endpoint
   mode?: "profile" | "query" | "full"  // Memory search mode (default: "profile")
-  addMemory?: "always" | "never"       // Auto-save conversations (default: "never")
+  addMemory?: "always" | "never"       // Auto-save conversations (default: "always")
   threadId?: string            // Conversation ID for grouping messages
   verbose?: boolean            // Enable debug logging (default: false)
   promptTemplate?: (data: MemoryPromptData) => string  // Custom memory formatting
@@ -662,7 +690,7 @@ interface WithSupermemoryOptions {
 - **conversationId**: Optional conversation ID to group messages into a single document for contextual memory generation
 - **verbose**: Enable detailed logging of memory search and injection process (default: false)
 - **mode**: Memory search mode - "profile" (default), "query", or "full"
-- **addMemory**: Automatic memory storage mode - "always" or "never" (default: "never")
+- **addMemory**: Automatic memory storage mode - "always" (default) or "never"
 
 ## Available Tools
 
diff --git a/packages/tools/package.json b/packages/tools/package.json
index 8d192aba6..80004a6d1 100644
--- a/packages/tools/package.json
+++ b/packages/tools/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@supermemory/tools",
   "type": "module",
-  "version": "1.4.01",
+  "version": "2.0.0",
   "description": "Memory tools for AI SDK and OpenAI function calling with supermemory",
   "scripts": {
     "build": "tsdown",
diff --git a/packages/tools/src/index.ts b/packages/tools/src/index.ts
index 404e0943f..236e2e4b2 100644
--- a/packages/tools/src/index.ts
+++ b/packages/tools/src/index.ts
@@ -1,3 +1,3 @@
 export type { SupermemoryToolsConfig } from "./types"
 
-export type { OpenAIMiddlewareOptions } from "./openai"
+export type { SupermemoryOpenAIOptions } from "./openai"
diff --git a/packages/tools/src/openai/index.ts b/packages/tools/src/openai/index.ts
index 17a37a9cb..c1b09eb71 100644
--- a/packages/tools/src/openai/index.ts
+++ b/packages/tools/src/openai/index.ts
@@ -1,7 +1,7 @@
 import type OpenAI from "openai"
 import {
 	createOpenAIMiddleware,
-	type OpenAIMiddlewareOptions,
+	type SupermemoryOpenAIOptions,
 } from "./middleware"
 
 /**
@@ -15,12 +15,17 @@ import {
  * the instructions parameter (appends to existing or creates new instructions).
  *
  * @param openaiClient - The OpenAI client to wrap with SuperMemory middleware
- * @param containerTag - The container tag/identifier for memory search (e.g., user ID, project ID)
- * @param options - Optional configuration options for the middleware
- * @param options.conversationId - Optional conversation ID to group messages into a single document for contextual memory generation
- * @param options.verbose - Optional flag to enable detailed logging of memory search and injection process (default: false)
- * @param options.mode - Optional mode for memory search: "profile" (default), "query", or "full"
- * @param options.addMemory - Optional mode for memory addition: "always", "never" (default)
+ * @param options - Configuration options for the middleware
+ * @param options.containerTag - The container tag/identifier for memory search (e.g., user ID)
+ * @param options.customId - Custom ID to group messages into a single document (e.g., conversation ID)
+ * @param options.mode - Memory search mode: "profile" (default), "query", or "full"
+ * @param options.searchMode - Search mode: "memories" (default), "hybrid" (memories + chunks), or "documents" (chunks only)
+ * @param options.searchLimit - Maximum number of search results when using hybrid/documents mode (default: 10)
+ * @param options.addMemory - Memory persistence mode: "always" (default) or "never"
+ * @param options.verbose - Enable detailed logging (default: false)
+ * @param options.apiKey - Supermemory API key (falls back to SUPERMEMORY_API_KEY env var)
+ * @param options.baseUrl - Custom Supermemory API base URL
+ * @param options.promptTemplate - Custom function to format memory data into the system prompt
  *
  * @returns An OpenAI client with SuperMemory middleware injected for both Chat Completions and Responses APIs
  *
@@ -29,14 +34,22 @@ import {
  * import { withSupermemory } from "@supermemory/tools/openai"
  * import OpenAI from "openai"
  *
- * // Create OpenAI client with supermemory middleware
- * const openai = new OpenAI({
- *   apiKey: process.env.OPENAI_API_KEY,
+ * const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
+ *
+ * // Basic usage
+ * const openaiWithSupermemory = withSupermemory(openai, {
+ *   containerTag: "user-123",
+ *   customId: "conv-456",
+ *   mode: "full",
  * })
- * const openaiWithSupermemory = withSupermemory(openai, "user-123", {
- *   conversationId: "conversation-456",
+ *
+ * // RAG usage with hybrid search (memories + document chunks)
+ * const ragClient = withSupermemory(openai, {
+ *   containerTag: "user-123",
+ *   customId: "conv-789",
  *   mode: "full",
- *   addMemory: "always"
+ *   searchMode: "hybrid",
+ *   searchLimit: 15,
  * })
  *
  * // Use with Chat Completions API - memories injected into system prompt
@@ -55,40 +68,50 @@ import {
  * })
  * ```
  *
- * @throws {Error} When SUPERMEMORY_API_KEY environment variable is not set
+ * @throws {Error} When neither apiKey option nor SUPERMEMORY_API_KEY environment variable is set
+ * @throws {Error} When containerTag is not provided or is empty
+ * @throws {Error} When customId is not provided or is empty
  * @throws {Error} When supermemory API request fails
  */
 export function withSupermemory(
 	openaiClient: OpenAI,
-	containerTag: string,
-	options?: OpenAIMiddlewareOptions,
+	options: SupermemoryOpenAIOptions,
 ) {
-	if (!process.env.SUPERMEMORY_API_KEY) {
-		throw new Error("SUPERMEMORY_API_KEY is not set")
+	const apiKey = options.apiKey ?? process.env.SUPERMEMORY_API_KEY
+	if (!apiKey) {
+		throw new Error(
+			"[supermemory] API key is required. Provide it via options.apiKey or set SUPERMEMORY_API_KEY environment variable.",
+		)
 	}
 
-	const conversationId = options?.conversationId
-	const verbose = options?.verbose ?? false
-	const mode = options?.mode ?? "profile"
-	const addMemory = options?.addMemory ?? "never"
-	const baseUrl = options?.baseUrl
+	if (
+		!options.containerTag ||
+		typeof options.containerTag !== "string" ||
+		!options.containerTag.trim()
+	) {
+		throw new Error(
+			"[supermemory] containerTag is required and must be a non-empty string. " +
+				"This identifies the user or container for memory scoping. " +
+				"Example: { containerTag: 'user-123', ... }",
+		)
+	}
 
-	const openaiWithSupermemory = createOpenAIMiddleware(
-		openaiClient,
-		containerTag,
-		{
-			conversationId,
-			verbose,
-			mode,
-			addMemory,
-			baseUrl,
-		},
-	)
+	if (
+		!options.customId ||
+		typeof options.customId !== "string" ||
+		!options.customId.trim()
+	) {
+		throw new Error(
+			"[supermemory] customId is required and must be a non-empty string. " +
+				"This ensures messages are grouped into the same document for a conversation. " +
+				"Example: { containerTag: 'user-123', customId: 'conv-456', ... }",
+		)
+	}
 
-	return openaiWithSupermemory
+	return createOpenAIMiddleware(openaiClient, { ...options, apiKey })
 }
 
-export type { OpenAIMiddlewareOptions }
+export type { SupermemoryOpenAIOptions }
 export type {
 	MemorySearchResult,
 	MemoryAddResult,
diff --git a/packages/tools/src/openai/middleware.ts b/packages/tools/src/openai/middleware.ts
index bce986d0f..15bcaeba3 100644
--- a/packages/tools/src/openai/middleware.ts
+++ b/packages/tools/src/openai/middleware.ts
@@ -1,59 +1,66 @@
 import type OpenAI from "openai"
 import Supermemory from "supermemory"
 import { addConversation } from "../conversations-client"
-import { deduplicateMemories } from "../tools-shared"
-import { createLogger, type Logger } from "../vercel/logger"
-import { convertProfileToMarkdown } from "../vercel/util"
-
-const normalizeBaseUrl = (url?: string): string => {
-	const defaultUrl = "https://api.supermemory.ai"
-	if (!url) return defaultUrl
-	return url.endsWith("/") ? url.slice(0, -1) : url
-}
+import {
+	createLogger,
+	normalizeBaseUrl,
+	buildMemoriesText,
+	type Logger,
+	type MemoryMode,
+	type SearchMode,
+	type AddMemoryMode,
+	type PromptTemplate,
+} from "../shared"
 
-export interface OpenAIMiddlewareOptions {
-	conversationId?: string
-	verbose?: boolean
-	mode?: "profile" | "query" | "full"
-	addMemory?: "always" | "never"
+/**
+ * Configuration options for the Supermemory OpenAI middleware.
+ */
+export interface SupermemoryOpenAIOptions {
+	/** Container tag/identifier for memory search (e.g., user ID) */
+	containerTag: string
+	/** Custom ID to group messages into a single document (e.g., conversation ID) */
+	customId: string
+	/** Supermemory API key (falls back to SUPERMEMORY_API_KEY env var) */
+	apiKey?: string
+	/** Custom Supermemory API base URL */
 	baseUrl?: string
-}
-
-interface SupermemoryProfileSearch {
-	profile: {
-		static?: Array<{ memory: string; metadata?: Record<string, unknown> }>
-		dynamic?: Array<{ memory: string; metadata?: Record<string, unknown> }>
-	}
-	searchResults: {
-		results: Array<{ memory: string; metadata?: Record<string, unknown> }>
-	}
+	/** Enable detailed logging of memory search and injection */
+	verbose?: boolean
+	/**
+	 * Memory retrieval mode:
+	 * - "profile": Retrieves user profile memories (static + dynamic) without query filtering
+	 * - "query": Searches memories based on semantic similarity to the user's message
+	 * - "full": Combines both profile and query-based results
+	 */
+	mode?: MemoryMode
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: SearchMode
+	/** Maximum number of search results to return when using hybrid/documents mode (default: 10) */
+	searchLimit?: number
+	/**
+	 * Memory persistence mode:
+	 * - "always": Automatically save conversations as memories (default)
+	 * - "never": Only retrieve memories, don't store new ones
+	 */
+	addMemory?: AddMemoryMode
+	/**
+	 * Custom function to format memory data into the system prompt.
+	 * If not provided, uses the default "User Supermemories:" format.
+	 */
+	promptTemplate?: PromptTemplate
 }
 
 /**
  * Extracts the last user message from an array of chat completion messages.
- *
- * Searches through the messages array in reverse order to find the most recent
- * message with role "user" and returns its content as a string.
- *
- * @param messages - Array of chat completion message parameters
- * @returns The content of the last user message, or empty string if none found
- *
- * @example
- * ```typescript
- * const messages = [
- *   { role: "system", content: "You are a helpful assistant." },
- *   { role: "user", content: "Hello there!" },
- *   { role: "assistant", content: "Hi! How can I help you?" },
- *   { role: "user", content: "What's the weather like?" }
- * ]
- *
- * const lastMessage = getLastUserMessage(messages)
- * // Returns: "What's the weather like?"
- * ```
  */
 const getLastUserMessage = (
 	messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
-) => {
+): string => {
 	const lastUserMessage = messages
 		.slice()
 		.reverse()
@@ -65,169 +72,31 @@ const getLastUserMessage = (
 }
 
 /**
- * Searches for memories using the SuperMemory profile API.
- *
- * Makes a POST request to the SuperMemory API to retrieve user profile memories
- * and search results based on the provided container tag and optional query text.
- *
- * @param containerTag - The container tag/identifier for memory search (e.g., user ID, project ID)
- * @param queryText - Optional query text to search for specific memories. If empty, returns all profile memories
- * @returns Promise that resolves to the SuperMemory profile search response
- * @throws {Error} When the API request fails or returns an error status
- *
- * @example
- * ```typescript
- * // Search with query
- * const results = await supermemoryProfileSearch("user-123", "favorite programming language")
- *
- * // Get all profile memories
- * const profile = await supermemoryProfileSearch("user-123", "")
- * ```
+ * Converts an array of chat completion messages into a formatted conversation string.
  */
-const supermemoryProfileSearch = async (
-	containerTag: string,
-	queryText: string,
-	baseUrl: string,
-): Promise<SupermemoryProfileSearch> => {
-	const payload = queryText
-		? JSON.stringify({
-				q: queryText,
-				containerTag: containerTag,
-			})
-		: JSON.stringify({
-				containerTag: containerTag,
-			})
-
-	try {
-		const response = await fetch(`${baseUrl}/v4/profile`, {
-			method: "POST",
-			headers: {
-				"Content-Type": "application/json",
-				Authorization: `Bearer ${process.env.SUPERMEMORY_API_KEY}`,
-			},
-			body: payload,
+const getConversationContent = (
+	messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
+): string => {
+	return messages
+		.map((msg) => {
+			const role = msg.role === "user" ? "User" : "Assistant"
+			const content = typeof msg.content === "string" ? msg.content : ""
+			return `${role}: ${content}`
 		})
-
-		if (!response.ok) {
-			const errorText = await response.text().catch(() => "Unknown error")
-			throw new Error(
-				`Supermemory profile search failed: ${response.status} ${response.statusText}. ${errorText}`,
-			)
-		}
-
-		return await response.json()
-	} catch (error) {
-		if (error instanceof Error) {
-			throw error
-		}
-		throw new Error(`Supermemory API request failed: ${error}`)
-	}
+		.join("\n\n")
 }
 
 /**
- * Adds memory-enhanced system prompts to chat completion messages.
- *
- * Searches for relevant memories based on the specified mode and injects them
- * into the conversation. If a system prompt already exists, memories are appended
- * to it. Otherwise, a new system prompt is created with the memories.
- *
- * @param messages - Array of chat completion message parameters
- * @param containerTag - The container tag/identifier for memory search
- * @param logger - Logger instance for debugging and info output
- * @param mode - Memory search mode: "profile" (all memories), "query" (search-based), or "full" (both)
- * @returns Promise that resolves to enhanced messages with memory-injected system prompt
- *
- * @example
- * ```typescript
- * const messages = [
- *   { role: "user", content: "What's my favorite programming language?" }
- * ]
- *
- * const enhancedMessages = await addSystemPrompt(
- *   messages,
- *   "user-123",
- *   logger,
- *   "full"
- * )
- * // Returns messages with system prompt containing relevant memories
- * ```
+ * Injects memories into messages by appending to existing system prompt
+ * or creating a new one.
  */
-const addSystemPrompt = async (
+const injectMemoriesIntoMessages = (
 	messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
-	containerTag: string,
+	memories: string,
 	logger: Logger,
-	mode: "profile" | "query" | "full",
-	baseUrl: string,
-) => {
+): OpenAI.Chat.Completions.ChatCompletionMessageParam[] => {
 	const systemPromptExists = messages.some((msg) => msg.role === "system")
 
-	const queryText = mode !== "profile" ? getLastUserMessage(messages) : ""
-
-	const memoriesResponse = await supermemoryProfileSearch(
-		containerTag,
-		queryText,
-		baseUrl,
-	)
-
-	const memoryCountStatic = memoriesResponse.profile.static?.length || 0
-	const memoryCountDynamic = memoriesResponse.profile.dynamic?.length || 0
-
-	logger.info("Memory search completed for chat API", {
-		containerTag,
-		memoryCountStatic,
-		memoryCountDynamic,
-		queryText:
-			queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
-		mode,
-	})
-
-	const deduplicated = deduplicateMemories({
-		static: memoriesResponse.profile.static,
-		dynamic: memoriesResponse.profile.dynamic,
-		searchResults: memoriesResponse.searchResults?.results,
-	})
-
-	logger.debug("Memory deduplication completed for chat API", {
-		static: {
-			original: memoryCountStatic,
-			deduplicated: deduplicated.static.length,
-		},
-		dynamic: {
-			original: memoryCountDynamic,
-			deduplicated: deduplicated.dynamic.length,
-		},
-		searchResults: {
-			original: memoriesResponse.searchResults?.results?.length,
-			deduplicated: deduplicated.searchResults.length,
-		},
-	})
-
-	const profileData =
-		mode !== "query"
-			? convertProfileToMarkdown({
-					profile: {
-						static: deduplicated.static,
-						dynamic: deduplicated.dynamic,
-					},
-					searchResults: { results: [] },
-				})
-			: ""
-	const searchResultsMemories =
-		mode !== "profile"
-			? `Search results for user's recent message: \n${deduplicated.searchResults
-					.map((memory) => `- ${memory}`)
-					.join("\n")}`
-			: ""
-
-	const memories = `${profileData}\n${searchResultsMemories}`.trim()
-
-	if (memories) {
-		logger.debug("Memory content preview for chat API", {
-			content: memories,
-			fullLength: memories.length,
-		})
-	}
-
 	if (systemPromptExists) {
 		logger.debug("Added memories to existing system prompt")
 		return messages.map((msg) =>
@@ -244,86 +113,20 @@ const addSystemPrompt = async (
 }
 
 /**
- * Converts an array of chat completion messages into a formatted conversation string.
- *
- * Transforms the messages array into a readable conversation format where each
- * message is prefixed with its role (User/Assistant) and messages are separated
- * by double newlines.
- *
- * @param messages - Array of chat completion message parameters
- * @returns Formatted conversation string with role prefixes
- *
- * @example
- * ```typescript
- * const messages = [
- *   { role: "user", content: "Hello!" },
- *   { role: "assistant", content: "Hi there!" },
- *   { role: "user", content: "How are you?" }
- * ]
- *
- * const conversation = getConversationContent(messages)
- * // Returns: "User: Hello!\n\nAssistant: Hi there!\n\nUser: How are you?"
- * ```
- */
-const getConversationContent = (
-	messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
-) => {
-	return messages
-		.map((msg) => {
-			const role = msg.role === "user" ? "User" : "Assistant"
-			const content = typeof msg.content === "string" ? msg.content : ""
-			return `${role}: ${content}`
-		})
-		.join("\n\n")
-}
-
-/**
- * Adds a new memory to the SuperMemory system.
- *
- * Saves the provided content as a memory with the specified container tag and
- * optional custom ID. Logs success or failure information for debugging.
- *
- * If customId starts with "conversation:" and messages are provided, uses the
- * /v4/conversations endpoint with structured messages instead of the memories endpoint.
- *
- * @param client - SuperMemory client instance
- * @param containerTag - The container tag/identifier for the memory
- * @param content - The content to save as a memory (used for fallback)
- * @param customId - Optional custom ID for the memory (e.g., conversation:456)
- * @param logger - Logger instance for debugging and info output
- * @param messages - Optional OpenAI messages array (for conversation endpoint)
- * @param apiKey - API key for direct conversation endpoint calls
- * @param baseUrl - Base URL for API calls
- * @returns Promise that resolves when memory is saved (or fails silently)
- *
- * @example
- * ```typescript
- * await addMemoryTool(
- *   supermemoryClient,
- *   "user-123",
- *   "User: Hello\n\nAssistant: Hi!",
- *   "conversation:456",
- *   logger,
- *   messages, // OpenAI messages array
- *   apiKey,
- *   baseUrl
- * )
- * ```
+ * Saves a conversation to Supermemory.
  */
-const addMemoryTool = async (
+const saveConversation = async (
 	client: Supermemory,
 	containerTag: string,
+	customId: string,
 	content: string,
-	customId: string | undefined,
 	logger: Logger,
 	messages?: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
 	apiKey?: string,
 	baseUrl?: string,
 ): Promise<void> => {
 	try {
-		if (customId && messages && apiKey) {
-			const conversationId = customId.replace("conversation:", "")
-
+		if (messages && apiKey) {
 			// Convert OpenAI messages to conversation format
 			const conversationMessages = messages.map((msg) => ({
 				role: msg.role as "user" | "assistant" | "system" | "tool",
@@ -338,15 +141,19 @@ const addMemoryTool = async (
 										text: (c as { type: "text"; text: string }).text,
 									}))
 							: "",
+				// biome-ignore lint/suspicious/noExplicitAny: OpenAI message types
 				...((msg as any).name && { name: (msg as any).name }),
+				// biome-ignore lint/suspicious/noExplicitAny: OpenAI message types
 				...((msg as any).tool_calls && { tool_calls: (msg as any).tool_calls }),
+				// biome-ignore lint/suspicious/noExplicitAny: OpenAI message types
 				...((msg as any).tool_call_id && {
+					// biome-ignore lint/suspicious/noExplicitAny: OpenAI message types
 					tool_call_id: (msg as any).tool_call_id,
 				}),
 			}))
 
 			const response = await addConversation({
-				conversationId,
+				conversationId: customId,
 				messages: conversationMessages,
 				containerTags: [containerTag],
 				apiKey,
@@ -355,18 +162,18 @@ const addMemoryTool = async (
 
 			logger.info("Conversation saved successfully via /v4/conversations", {
 				containerTag,
-				conversationId,
+				customId,
 				messageCount: messages.length,
 				responseId: response.id,
 			})
 			return
 		}
 
-		// Fallback to old behavior for non-conversation memories
+		// Fallback to old behavior
 		const response = await client.add({
 			content,
 			containerTags: [containerTag],
-			customId,
+			customId: `conversation:${customId}`,
 		})
 
 		logger.info("Memory saved successfully", {
@@ -384,140 +191,111 @@ const addMemoryTool = async (
 
 /**
  * Creates SuperMemory middleware for OpenAI clients.
- *
- * This function creates middleware that automatically injects relevant memories
- * into OpenAI chat completions and optionally saves new memories. The middleware
- * can wrap existing OpenAI clients or create new ones with SuperMemory capabilities.
- *
- * @param containerTag - The container tag/identifier for memory search (e.g., user ID, project ID)
- * @param options - Optional configuration options for the middleware
- * @param options.conversationId - Optional conversation ID to group messages for contextual memory generation
- * @param options.verbose - Enable detailed logging of memory operations (default: false)
- * @param options.mode - Memory search mode: "profile" (all memories), "query" (search-based), or "full" (both) (default: "profile")
- * @param options.addMemory - Automatic memory storage mode: "always" or "never" (default: "never")
- * @returns Object with `wrapClient` and `createClient` methods
- * @throws {Error} When SUPERMEMORY_API_KEY environment variable is not set
- *
- * @example
- * ```typescript
- * const openaiWithSupermemory = createOpenAIMiddleware(openai, "user-123", {
- *   conversationId: "conversation-456",
- *   mode: "full",
- *   addMemory: "always",
- *   verbose: true
- * })
- *
- * ```
  */
 export function createOpenAIMiddleware(
 	openaiClient: OpenAI,
-	containerTag: string,
-	options?: OpenAIMiddlewareOptions,
+	options: SupermemoryOpenAIOptions & { apiKey: string },
 ) {
-	const logger = createLogger(options?.verbose ?? false)
-	const baseUrl = normalizeBaseUrl(options?.baseUrl)
+	const {
+		containerTag,
+		customId,
+		apiKey,
+		baseUrl,
+		verbose = false,
+		mode = "profile",
+		searchMode = "memories",
+		searchLimit = 10,
+		addMemory = "always",
+		promptTemplate,
+	} = options
+
+	const logger = createLogger(verbose)
+	const normalizedBaseUrl = normalizeBaseUrl(baseUrl)
 	const client = new Supermemory({
-		apiKey: process.env.SUPERMEMORY_API_KEY,
-		...(baseUrl !== "https://api.supermemory.ai" ? { baseURL: baseUrl } : {}),
+		apiKey,
+		...(normalizedBaseUrl !== "https://api.supermemory.ai"
+			? { baseURL: normalizedBaseUrl }
+			: {}),
 	})
 
-	const conversationId = options?.conversationId
-	const mode = options?.mode ?? "profile"
-	const addMemory = options?.addMemory ?? "never"
-
 	const originalCreate = openaiClient.chat.completions.create
 	const originalResponsesCreate = openaiClient.responses?.create
 
 	/**
-	 * Searches for memories and formats them for injection into API calls.
-	 *
-	 * This shared function handles memory search and formatting for both Chat Completions
-	 * and Responses APIs, reducing code duplication.
-	 *
-	 * @param queryText - The text to search for (empty string for profile-only mode)
-	 * @param containerTag - The container tag for memory search
-	 * @param logger - Logger instance
-	 * @param mode - Memory search mode
-	 * @param context - API context for logging differentiation
-	 * @returns Formatted memories string
+	 * Wraps chat.completions.create with memory injection
 	 */
-	const searchAndFormatMemories = async (
-		queryText: string,
-		containerTag: string,
-		logger: Logger,
-		mode: "profile" | "query" | "full",
-		context: "chat" | "responses",
+	const createWithMemory = async (
+		params: OpenAI.Chat.Completions.ChatCompletionCreateParams,
 	) => {
-		const memoriesResponse = await supermemoryProfileSearch(
-			containerTag,
-			queryText,
-			baseUrl,
-		)
+		const messages = Array.isArray(params.messages) ? params.messages : []
 
-		const memoryCountStatic = memoriesResponse.profile.static?.length || 0
-		const memoryCountDynamic = memoriesResponse.profile.dynamic?.length || 0
+		const userMessage = getLastUserMessage(messages)
+		if (mode !== "profile" && !userMessage) {
+			logger.debug("No user message found, skipping memory search")
+			return originalCreate.call(openaiClient.chat.completions, params)
+		}
 
-		logger.info(`Memory search completed for ${context} API`, {
+		logger.info("Starting memory search", {
 			containerTag,
-			memoryCountStatic,
-			memoryCountDynamic,
-			queryText:
-				queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
+			customId,
 			mode,
+			searchMode,
 		})
 
-		const deduplicated = deduplicateMemories({
-			static: memoriesResponse.profile.static,
-			dynamic: memoriesResponse.profile.dynamic,
-			searchResults: memoriesResponse.searchResults?.results,
-		})
-
-		logger.debug(`Memory deduplication completed for ${context} API`, {
-			static: {
-				original: memoryCountStatic,
-				deduplicated: deduplicated.static.length,
-			},
-			dynamic: {
-				original: memoryCountDynamic,
-				deduplicated: deduplicated.dynamic.length,
-			},
-			searchResults: {
-				original: memoriesResponse.searchResults?.results?.length,
-				deduplicated: deduplicated.searchResults.length,
-			},
-		})
+		const operations: Promise<unknown>[] = []
 
-		const profileData =
-			mode !== "query"
-				? convertProfileToMarkdown({
-						profile: {
-							static: deduplicated.static,
-							dynamic: deduplicated.dynamic,
-						},
-						searchResults: { results: [] },
-					})
-				: ""
-		const searchResultsMemories =
-			mode !== "profile"
-				? `Search results for user's ${context === "chat" ? "recent message" : "input"}: \n${deduplicated.searchResults
-						.map((memory) => `- ${memory}`)
-						.join("\n")}`
-				: ""
-
-		const memories = `${profileData}\n${searchResultsMemories}`.trim()
-
-		if (memories) {
-			logger.debug(`Memory content preview for ${context} API`, {
-				content: memories,
-				fullLength: memories.length,
-			})
+		// Save conversation if enabled
+		if (addMemory === "always" && userMessage?.trim()) {
+			const content = getConversationContent(messages)
+			operations.push(
+				saveConversation(
+					client,
+					containerTag,
+					customId,
+					content,
+					logger,
+					messages,
+					apiKey,
+					normalizedBaseUrl,
+				),
+			)
 		}
 
-		return memories
+		// Fetch and inject memories
+		const queryText = mode !== "profile" ? userMessage : ""
+		operations.push(
+			buildMemoriesText({
+				containerTag,
+				queryText,
+				mode,
+				baseUrl: normalizedBaseUrl,
+				apiKey,
+				logger,
+				promptTemplate,
+				searchMode,
+				searchLimit,
+			}),
+		)
+
+		const results = await Promise.all(operations)
+		const memories = results[results.length - 1] as string
+
+		// Only inject memories if we actually have some
+		const enhancedMessages = memories
+			? injectMemoriesIntoMessages(messages, memories, logger)
+			: messages
+
+		return originalCreate.call(openaiClient.chat.completions, {
+			...params,
+			messages: enhancedMessages,
+		})
 	}
 
+	/**
+	 * Wraps responses.create with memory injection
+	 */
 	const createResponsesWithMemory = async (
-		params: Parameters<typeof originalResponsesCreate>[0],
+		params: Parameters<NonNullable<typeof originalResponsesCreate>>[0],
 	) => {
 		if (!originalResponsesCreate) {
 			throw new Error(
@@ -534,36 +312,48 @@ export function createOpenAIMiddleware(
 
 		logger.info("Starting memory search for Responses API", {
 			containerTag,
-			conversationId,
+			customId,
 			mode,
+			searchMode,
 		})
 
-		const operations: Promise<any>[] = []
+		const operations: Promise<unknown>[] = []
 
+		// Save input if enabled (Responses API doesn't have messages array)
 		if (addMemory === "always" && input?.trim()) {
-			const content = conversationId ? `Input: ${input}` : input
-			const customId = conversationId
-				? `conversation:${conversationId}`
-				: undefined
-
+			const content = `Input: ${input}`
 			operations.push(
-				addMemoryTool(client, containerTag, content, customId, logger),
+				saveConversation(
+					client,
+					containerTag,
+					customId,
+					content,
+					logger,
+					undefined,
+					apiKey,
+					normalizedBaseUrl,
+				),
 			)
 		}
 
+		// Fetch memories
 		const queryText = mode !== "profile" ? input : ""
 		operations.push(
-			searchAndFormatMemories(
-				queryText,
+			buildMemoriesText({
 				containerTag,
-				logger,
+				queryText,
 				mode,
-				"responses",
-			),
+				baseUrl: normalizedBaseUrl,
+				apiKey,
+				logger,
+				promptTemplate,
+				searchMode,
+				searchLimit,
+			}),
 		)
 
 		const results = await Promise.all(operations)
-		const memories = results[results.length - 1] // Memory search result is always last
+		const memories = results[results.length - 1] as string
 
 		const enhancedInstructions = memories
 			? `${params.instructions || ""}\n\n${memories}`.trim()
@@ -575,69 +365,10 @@ export function createOpenAIMiddleware(
 		})
 	}
 
-	const createWithMemory = async (
-		params: OpenAI.Chat.Completions.ChatCompletionCreateParams,
-	) => {
-		const messages = Array.isArray(params.messages) ? params.messages : []
-
-		if (mode !== "profile") {
-			const userMessage = getLastUserMessage(messages)
-			if (!userMessage) {
-				logger.debug("No user message found, skipping memory search")
-				return originalCreate.call(openaiClient.chat.completions, params)
-			}
-		}
-
-		logger.info("Starting memory search", {
-			containerTag,
-			conversationId,
-			mode,
-		})
-
-		const operations: Promise<any>[] = []
-
-		if (addMemory === "always") {
-			const userMessage = getLastUserMessage(messages)
-			if (userMessage?.trim()) {
-				const content = conversationId
-					? getConversationContent(messages)
-					: userMessage
-				const customId = conversationId
-					? `conversation:${conversationId}`
-					: undefined
-
-				operations.push(
-					addMemoryTool(
-						client,
-						containerTag,
-						content,
-						customId,
-						logger,
-						messages,
-						process.env.SUPERMEMORY_API_KEY,
-						baseUrl,
-					),
-				)
-			}
-		}
-
-		operations.push(
-			addSystemPrompt(messages, containerTag, logger, mode, baseUrl),
-		)
-
-		const results = await Promise.all(operations)
-		const enhancedMessages = results[results.length - 1] // Enhanced messages result is always last
-
-		return originalCreate.call(openaiClient.chat.completions, {
-			...params,
-			messages: enhancedMessages,
-		})
-	}
-
+	// Replace original methods with memory-enhanced versions
 	openaiClient.chat.completions.create =
 		createWithMemory as typeof originalCreate
 
-	// Wrap Responses API if available
 	if (originalResponsesCreate) {
 		openaiClient.responses.create =
 			createResponsesWithMemory as typeof originalResponsesCreate
diff --git a/packages/tools/src/shared/index.ts b/packages/tools/src/shared/index.ts
index 5a6e0f7ba..b2ed2ee90 100644
--- a/packages/tools/src/shared/index.ts
+++ b/packages/tools/src/shared/index.ts
@@ -3,6 +3,7 @@ export type {
 	MemoryPromptData,
 	PromptTemplate,
 	MemoryMode,
+	SearchMode,
 	AddMemoryMode,
 	Logger,
 	ProfileStructure,
diff --git a/packages/tools/src/shared/memory-client.ts b/packages/tools/src/shared/memory-client.ts
index 58754c895..97bbfa5e0 100644
--- a/packages/tools/src/shared/memory-client.ts
+++ b/packages/tools/src/shared/memory-client.ts
@@ -1,7 +1,9 @@
+import Supermemory from "supermemory"
 import { deduplicateMemories } from "../tools-shared"
 import type {
 	Logger,
 	MemoryMode,
+	SearchMode,
 	MemoryPromptData,
 	ProfileStructure,
 	PromptTemplate,
@@ -10,6 +12,7 @@ import {
 	convertProfileToMarkdown,
 	defaultPromptTemplate,
 } from "./prompt-builder"
+import { createSupermemoryClient } from "./context"
 
 /**
  * Fetches profile and search results from the Supermemory API.
@@ -72,12 +75,153 @@ export interface BuildMemoriesTextOptions {
 	apiKey: string
 	logger: Logger
 	promptTemplate?: PromptTemplate
+	/**
+	 * Search mode for memory retrieval:
+	 * - "memories": Search only memory entries (default)
+	 * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+	 * - "documents": Search only document chunks
+	 */
+	searchMode?: SearchMode
+	/** Maximum number of search results to return when using hybrid/documents mode (default: 10) */
+	searchLimit?: number
+}
+
+/**
+ * Search result structure for unified handling.
+ */
+interface SearchResult {
+	content: string
+	metadata?: Record<string, unknown>
+}
+
+/**
+ * Searches for memories using the Supermemory SDK.
+ *
+ * @param client - Supermemory client instance
+ * @param containerTag - Container tag for scoping
+ * @param query - Search query text
+ * @param limit - Maximum number of results
+ * @returns Array of search results
+ */
+async function searchMemoriesSDK(
+	client: Supermemory,
+	containerTag: string,
+	query: string,
+	limit: number,
+): Promise<SearchResult[]> {
+	const response = await client.search.memories({
+		q: query,
+		containerTag,
+		limit,
+		include: { chunks: false },
+	})
+	return (response.results || []).map((r) => ({
+		content: r.memory || "",
+		metadata: r.metadata ?? undefined,
+	}))
+}
+
+/**
+ * Searches for document chunks using the Supermemory SDK.
+ * Only includes chunks marked as relevant (isRelevant: true).
+ *
+ * @param client - Supermemory client instance
+ * @param containerTag - Container tag for scoping
+ * @param query - Search query text
+ * @param limit - Maximum number of results
+ * @returns Array of search results containing only relevant chunks
+ */
+async function searchDocumentsSDK(
+	client: Supermemory,
+	containerTag: string,
+	query: string,
+	limit: number,
+): Promise<SearchResult[]> {
+	const response = await client.search.documents({
+		q: query,
+		containerTags: [containerTag],
+		limit,
+	})
+	// Extract only relevant chunks from each document result
+	const results: SearchResult[] = []
+	for (const doc of response.results || []) {
+		for (const chunk of doc.chunks || []) {
+			// Only include chunks marked as relevant
+			if (chunk.isRelevant) {
+				results.push({
+					content: chunk.content || "",
+					metadata: doc.metadata ?? undefined,
+				})
+			}
+		}
+	}
+	return results
+}
+
+/**
+ * Performs search based on the specified search mode.
+ *
+ * @param client - Supermemory client instance
+ * @param containerTag - Container tag for scoping
+ * @param query - Search query text
+ * @param searchMode - Search mode: "memories", "hybrid", or "documents"
+ * @param limit - Maximum number of results per search type
+ * @param logger - Logger instance
+ * @returns Combined array of search results
+ */
+async function performSearch(
+	client: Supermemory,
+	containerTag: string,
+	query: string,
+	searchMode: SearchMode,
+	limit: number,
+	logger: Logger,
+): Promise<SearchResult[]> {
+	logger.debug("Performing search", { searchMode, containerTag, limit })
+
+	switch (searchMode) {
+		case "memories":
+			return searchMemoriesSDK(client, containerTag, query, limit)
+
+		case "documents":
+			return searchDocumentsSDK(client, containerTag, query, limit)
+
+		case "hybrid": {
+			// Run both searches in parallel
+			const [memoriesResults, documentsResults] = await Promise.all([
+				searchMemoriesSDK(client, containerTag, query, limit),
+				searchDocumentsSDK(client, containerTag, query, limit),
+			])
+
+			logger.debug("Hybrid search completed", {
+				memoriesCount: memoriesResults.length,
+				documentsCount: documentsResults.length,
+			})
+
+			// Combine results, memories first
+			return [...memoriesResults, ...documentsResults]
+		}
+
+		default:
+			logger.warn(`Unknown search mode: ${searchMode}, defaulting to memories`)
+			return searchMemoriesSDK(client, containerTag, query, limit)
+	}
 }
 
 /**
  * Fetches memories from the API, deduplicates them, and formats them into
  * the final string to be injected into the system prompt.
  *
+ * When searchMode is specified, uses the Supermemory SDK search endpoints:
+ * - "memories": Uses search.memories() for memory entries
+ * - "documents": Uses search.documents() for document chunks
+ * - "hybrid": Uses both endpoints in parallel
+ *
+ * The mode option controls whether profile data is included:
+ * - "profile": Only profile data (static/dynamic), no search
+ * - "query": Only search results, no profile data
+ * - "full": Both profile data and search results
+ *
  * @param options - Configuration for building memories text
  * @returns The final formatted memories string ready for injection
  */
@@ -92,48 +236,77 @@ export const buildMemoriesText = async (
 		apiKey,
 		logger,
 		promptTemplate = defaultPromptTemplate,
+		searchMode = "memories",
+		searchLimit = 10,
 	} = options
 
-	const memoriesResponse = await supermemoryProfileSearch(
-		containerTag,
-		queryText,
-		baseUrl,
-		apiKey,
-	)
+	// Fetch profile data when mode includes profile (profile or full)
+	let profileData: ProfileStructure | null = null
+	if (mode !== "query") {
+		profileData = await supermemoryProfileSearch(
+			containerTag,
+			mode === "profile" ? "" : queryText, // Only send query for full mode
+			baseUrl,
+			apiKey,
+		)
 
-	const memoryCountStatic = memoriesResponse.profile.static?.length || 0
-	const memoryCountDynamic = memoriesResponse.profile.dynamic?.length || 0
+		const memoryCountStatic = profileData.profile.static?.length || 0
+		const memoryCountDynamic = profileData.profile.dynamic?.length || 0
 
-	logger.info("Memory search completed", {
-		containerTag,
-		memoryCountStatic,
-		memoryCountDynamic,
-		queryText:
-			queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
-		mode,
-	})
+		logger.info("Profile search completed", {
+			containerTag,
+			memoryCountStatic,
+			memoryCountDynamic,
+			mode,
+		})
+	}
+
+	// Perform SDK-based search when mode includes query (query or full)
+	let searchResults: SearchResult[] = []
+	if (mode !== "profile" && queryText) {
+		const client = createSupermemoryClient({ apiKey, baseUrl })
+		searchResults = await performSearch(
+			client,
+			containerTag,
+			queryText,
+			searchMode,
+			searchLimit,
+			logger,
+		)
+
+		logger.info("Search completed", {
+			containerTag,
+			searchMode,
+			searchLimit,
+			resultCount: searchResults.length,
+			queryText:
+				queryText.substring(0, 100) + (queryText.length > 100 ? "..." : ""),
+		})
+	}
 
+	// Deduplicate profile memories
 	const deduplicated = deduplicateMemories({
-		static: memoriesResponse.profile.static,
-		dynamic: memoriesResponse.profile.dynamic,
-		searchResults: memoriesResponse.searchResults?.results,
+		static: profileData?.profile.static,
+		dynamic: profileData?.profile.dynamic,
+		searchResults: searchResults.map((r) => ({ memory: r.content })),
 	})
 
 	logger.debug("Memory deduplication completed", {
 		static: {
-			original: memoryCountStatic,
+			original: profileData?.profile.static?.length || 0,
 			deduplicated: deduplicated.static.length,
 		},
 		dynamic: {
-			original: memoryCountDynamic,
+			original: profileData?.profile.dynamic?.length || 0,
 			deduplicated: deduplicated.dynamic.length,
 		},
 		searchResults: {
-			original: memoriesResponse.searchResults?.results?.length,
+			original: searchResults.length,
 			deduplicated: deduplicated.searchResults?.length,
 		},
 	})
 
+	// Build user memories from profile (static + dynamic)
 	const userMemories =
 		mode !== "query"
 			? convertProfileToMarkdown({
@@ -144,9 +317,11 @@ export const buildMemoriesText = async (
 					searchResults: { results: [] },
 				})
 			: ""
+
+	// Build search results text
 	const generalSearchMemories =
-		mode !== "profile"
-			? `Search results for user's recent message: \n${deduplicated.searchResults
+		mode !== "profile" && deduplicated.searchResults.length > 0
+			? `Search results for user's recent message:\n${deduplicated.searchResults
 					.map((memory) => `- ${memory}`)
 					.join("\n")}`
 			: ""
@@ -154,7 +329,10 @@ export const buildMemoriesText = async (
 	const promptData: MemoryPromptData = {
 		userMemories,
 		generalSearchMemories,
-		searchResults: memoriesResponse.searchResults?.results ?? [],
+		searchResults: searchResults.map((r) => ({
+			memory: r.content,
+			metadata: r.metadata,
+		})),
 	}
 
 	const memories = promptTemplate(promptData)
diff --git a/packages/tools/src/shared/types.ts b/packages/tools/src/shared/types.ts
index 421785f52..285047f02 100644
--- a/packages/tools/src/shared/types.ts
+++ b/packages/tools/src/shared/types.ts
@@ -47,6 +47,14 @@ export type PromptTemplate = (data: MemoryPromptData) => string
  */
 export type MemoryMode = "profile" | "query" | "full"
 
+/**
+ * Search mode for memory retrieval:
+ * - "memories": Search only memory entries (default)
+ * - "hybrid": Search both memories AND document chunks (recommended for RAG)
+ * - "documents": Search only document chunks
+ */
+export type SearchMode = "memories" | "hybrid" | "documents"
+
 /**
  * Memory persistence mode:
  * - "always": Automatically save conversations as memories
diff --git a/packages/tools/test-supermemory.ts b/packages/tools/test-supermemory.ts
index b3863c7ea..e59c13285 100644
--- a/packages/tools/test-supermemory.ts
+++ b/packages/tools/test-supermemory.ts
@@ -10,7 +10,9 @@ const openai = new OpenAI({
 })
 
 // Wrap OpenAI client with supermemory
-const openaiWithSupermemory = withSupermemory(openai, "test_user_123", {
+const openaiWithSupermemory = withSupermemory(openai, {
+	containerTag: "test_user_123",
+	customId: "test-conversation",
 	verbose: true, // Enable logging to see what's happening
 	mode: "full", // Search both profile and query memories
 	addMemory: "always", // Auto-save conversations as memories
diff --git a/packages/tools/test/chatapp/app/api/openai-chat/route.ts b/packages/tools/test/chatapp/app/api/openai-chat/route.ts
index 4e926fb05..b5e3067e1 100644
--- a/packages/tools/test/chatapp/app/api/openai-chat/route.ts
+++ b/packages/tools/test/chatapp/app/api/openai-chat/route.ts
@@ -5,17 +5,18 @@ import { withSupermemory } from "../../../../../src/openai"
 export const runtime = "nodejs"
 
 export async function POST(req: Request) {
-	const { messages, conversationId } = (await req.json()) as {
+	const { messages, customId } = (await req.json()) as {
 		messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[]
-		conversationId: string
+		customId: string
 	}
 
 	const openai = new OpenAI({
 		apiKey: process.env.OPENAI_API_KEY,
 	})
 
-	const openaiWithSupermemory = withSupermemory(openai, "user-123", {
-		conversationId,
+	const openaiWithSupermemory = withSupermemory(openai, {
+		containerTag: "user-123",
+		customId,
 		mode: "full",
 		addMemory: "always",
 		verbose: true,
diff --git a/packages/tools/test/openai-responses-test.ts b/packages/tools/test/openai-responses-test.ts
index 776e75740..e55abefe1 100644
--- a/packages/tools/test/openai-responses-test.ts
+++ b/packages/tools/test/openai-responses-test.ts
@@ -5,7 +5,9 @@ const openai = new OpenAI({
 	apiKey: process.env.OPENAI_API_KEY,
 })
 
-const openaiWithSupermemory = withSupermemory(openai, "user_id_life", {
+const openaiWithSupermemory = withSupermemory(openai, {
+	containerTag: "user_id_life",
+	customId: "test-conversation",
 	verbose: true,
 	mode: "full",
 	addMemory: "always",
diff --git a/packages/tools/test/openai/unit.test.ts b/packages/tools/test/openai/unit.test.ts
new file mode 100644
index 000000000..a8735578f
--- /dev/null
+++ b/packages/tools/test/openai/unit.test.ts
@@ -0,0 +1,474 @@
+/**
+ * Unit tests for the OpenAI withSupermemory wrapper and middleware
+ */
+
+import { describe, it, expect, beforeEach, vi, afterEach } from "vitest"
+import { withSupermemory } from "../../src/openai"
+import { createMockProfileResponse } from "../utils/supermemory-mocks"
+
+// Create a mock OpenAI client
+const createMockOpenAIClient = () => {
+	const mockCreate = vi.fn().mockResolvedValue({
+		choices: [{ message: { content: "Hello!", role: "assistant" } }],
+	})
+
+	const mockResponsesCreate = vi.fn().mockResolvedValue({
+		output: [{ type: "message", content: "Hello!" }],
+	})
+
+	return {
+		chat: {
+			completions: {
+				create: mockCreate,
+			},
+		},
+		responses: {
+			create: mockResponsesCreate,
+		},
+		// Store references for assertion
+		_mockCreate: mockCreate,
+		_mockResponsesCreate: mockResponsesCreate,
+	} as any
+}
+
+describe("Unit: OpenAI withSupermemory", () => {
+	let originalEnv: string | undefined
+	let originalFetch: typeof globalThis.fetch
+	let fetchMock: ReturnType<typeof vi.fn>
+
+	beforeEach(() => {
+		originalEnv = process.env.SUPERMEMORY_API_KEY
+		originalFetch = globalThis.fetch
+		process.env.SUPERMEMORY_API_KEY = "test-api-key"
+		fetchMock = vi.fn()
+		globalThis.fetch = fetchMock as unknown as typeof fetch
+		vi.clearAllMocks()
+	})
+
+	afterEach(() => {
+		if (originalEnv) {
+			process.env.SUPERMEMORY_API_KEY = originalEnv
+		} else {
+			delete process.env.SUPERMEMORY_API_KEY
+		}
+		globalThis.fetch = originalFetch
+	})
+
+	describe("Initialization", () => {
+		it("should throw error if SUPERMEMORY_API_KEY is not set and no apiKey provided", () => {
+			delete process.env.SUPERMEMORY_API_KEY
+			const mockClient = createMockOpenAIClient()
+
+			expect(() => {
+				withSupermemory(mockClient, {
+					containerTag: "user-123",
+					customId: "conv-456",
+				})
+			}).toThrow("API key is required")
+		})
+
+		it("should return the wrapped client with valid config", () => {
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+			})
+
+			expect(wrapped).toBeDefined()
+			expect(wrapped.chat.completions.create).toBeDefined()
+		})
+
+		it("should accept all options in the options object", () => {
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				verbose: true,
+				mode: "full",
+				addMemory: "always",
+			})
+			expect(wrapped).toBeDefined()
+		})
+
+		it("should work with minimal required options", () => {
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+			})
+			expect(wrapped).toBeDefined()
+		})
+	})
+
+	describe("customId as required parameter", () => {
+		it("should require customId in options object", () => {
+			const mockClient = createMockOpenAIClient()
+
+			// This should work — customId provided
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+			})
+			expect(wrapped).toBeDefined()
+		})
+
+		it("should throw with helpful message for empty customId", () => {
+			const mockClient = createMockOpenAIClient()
+
+			expect(() => {
+				withSupermemory(mockClient, {
+					containerTag: "user-123",
+					customId: "",
+				})
+			}).toThrow("[supermemory] customId is required")
+		})
+
+		it("should throw with helpful message for whitespace-only customId", () => {
+			const mockClient = createMockOpenAIClient()
+
+			expect(() => {
+				withSupermemory(mockClient, {
+					containerTag: "user-123",
+					customId: "   ",
+				})
+			}).toThrow("[supermemory] customId is required")
+		})
+
+		it("should pass customId through to middleware", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () => Promise.resolve(createMockProfileResponse(["Test memory"])),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "my-conversation-id",
+				mode: "profile",
+				addMemory: "always",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "Hello" }],
+			})
+
+			// Verify the conversation ID is used — addMemory should use it to build customId
+			// The fetch call to /v4/profile should have been made
+			expect(fetchMock).toHaveBeenCalled()
+		})
+	})
+
+	describe("Chat Completions - Memory injection", () => {
+		it("should inject memories into system prompt in profile mode", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () =>
+					Promise.resolve(
+						createMockProfileResponse(
+							["User likes TypeScript"],
+							["Currently working on a monorepo"],
+						),
+					),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const originalCreate = mockClient._mockCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "profile",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "What do you know about me?" }],
+			})
+
+			// Verify the original create was called with enhanced messages
+			expect(originalCreate).toHaveBeenCalledTimes(1)
+			const calledMessages = originalCreate.mock.calls[0][0].messages
+
+			// Should have a system message prepended with memories
+			expect(calledMessages[0].role).toBe("system")
+			expect(calledMessages[0].content).toContain("User likes TypeScript")
+			expect(calledMessages[0].content).toContain(
+				"Currently working on a monorepo",
+			)
+		})
+
+		it("should append memories to existing system prompt", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () =>
+					Promise.resolve(
+						createMockProfileResponse(["User prefers dark mode"]),
+					),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const originalCreate = mockClient._mockCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "profile",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [
+					{ role: "system", content: "You are a helpful assistant." },
+					{ role: "user", content: "Hello!" },
+				],
+			})
+
+			const calledMessages = originalCreate.mock.calls[0][0].messages
+			const systemMsg = calledMessages.find((m: any) => m.role === "system")
+			expect(systemMsg.content).toContain("You are a helpful assistant.")
+			expect(systemMsg.content).toContain("User prefers dark mode")
+		})
+
+		it("should search memories based on user message in query mode", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () =>
+					Promise.resolve(
+						createMockProfileResponse([], [], ["TypeScript is their favorite"]),
+					),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const originalCreate = mockClient._mockCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "query",
+				addMemory: "never",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [
+					{ role: "user", content: "What's my favorite programming language?" },
+				],
+			})
+
+			// Verify fetch was called with query text
+			expect(fetchMock.mock.calls.length).toBeGreaterThan(0)
+			const fetchCall = fetchMock.mock.calls[0]
+			const fetchBody = JSON.parse(fetchCall?.[1]?.body)
+			expect(fetchBody.q).toBe("What's my favorite programming language?")
+			expect(fetchBody.containerTag).toBe("user-123")
+
+			const calledMessages = originalCreate.mock.calls[0][0].messages
+			expect(calledMessages[0].content).toContain(
+				"TypeScript is their favorite",
+			)
+		})
+
+		it("should combine profile and search in full mode", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () =>
+					Promise.resolve(
+						createMockProfileResponse(
+							["Name: Alice"],
+							["Likes coffee"],
+							["Recently discussed Python"],
+						),
+					),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const originalCreate = mockClient._mockCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "full",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "Tell me about myself" }],
+			})
+
+			const calledMessages = originalCreate.mock.calls[0][0].messages
+			const systemContent = calledMessages[0].content
+			expect(systemContent).toContain("Name: Alice")
+			expect(systemContent).toContain("Likes coffee")
+			expect(systemContent).toContain("Recently discussed Python")
+		})
+
+		it("should skip memory search when no user message in query mode", async () => {
+			const mockClient = createMockOpenAIClient()
+			const originalCreate = mockClient._mockCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "query",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "system", content: "You are a helpful assistant." }],
+			})
+
+			// Should not have called the Supermemory API
+			expect(fetchMock).not.toHaveBeenCalled()
+			// Should have called original create with unchanged messages
+			expect(originalCreate).toHaveBeenCalledTimes(1)
+		})
+	})
+
+	describe("Chat Completions - Memory storage", () => {
+		it("should not save memory when addMemory is never", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () => Promise.resolve(createMockProfileResponse()),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				addMemory: "never",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "Remember this!" }],
+			})
+
+			// Only one fetch call (profile search), no memory add call
+			expect(fetchMock).toHaveBeenCalledTimes(1)
+			const fetchUrl = fetchMock.mock.calls[0]?.[0]
+			expect(fetchUrl).toContain("/v4/profile")
+		})
+	})
+
+	describe("Responses API", () => {
+		it("should inject memories into instructions for Responses API", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () =>
+					Promise.resolve(createMockProfileResponse(["User is a developer"])),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const originalResponsesCreate = mockClient._mockResponsesCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "profile",
+			})
+
+			await wrapped.responses.create({
+				model: "gpt-4o",
+				instructions: "You are a helpful assistant.",
+				input: "What do you know about me?",
+			})
+
+			expect(originalResponsesCreate).toHaveBeenCalledTimes(1)
+			const calledParams = originalResponsesCreate.mock.calls[0][0]
+			expect(calledParams.instructions).toContain(
+				"You are a helpful assistant.",
+			)
+			expect(calledParams.instructions).toContain("User is a developer")
+		})
+	})
+
+	describe("Error handling", () => {
+		it("should propagate API errors from Supermemory", async () => {
+			fetchMock.mockResolvedValue({
+				ok: false,
+				status: 500,
+				statusText: "Internal Server Error",
+				text: () => Promise.resolve("Server error"),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				mode: "profile",
+			})
+
+			await expect(
+				wrapped.chat.completions.create({
+					model: "gpt-4",
+					messages: [{ role: "user", content: "Hello" }],
+				}),
+			).rejects.toThrow("Supermemory profile search failed")
+		})
+
+		it("should handle empty memories gracefully", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () => Promise.resolve(createMockProfileResponse()),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const originalCreate = mockClient._mockCreate
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "Hello" }],
+			})
+
+			// Should still call original create
+			expect(originalCreate).toHaveBeenCalledTimes(1)
+		})
+	})
+
+	describe("Options defaults", () => {
+		it("should default to profile mode", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () => Promise.resolve(createMockProfileResponse(["A memory"])),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			// No mode specified — defaults to profile mode
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+				addMemory: "never",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "Hello" }],
+			})
+
+			// In profile mode, query text should be empty (no q param)
+			const fetchBody = JSON.parse(fetchMock.mock.calls[0]?.[1]?.body)
+			expect(fetchBody.q).toBeUndefined()
+			expect(fetchBody.containerTag).toBe("user-123")
+		})
+
+		it("should default to always for addMemory", async () => {
+			fetchMock.mockResolvedValue({
+				ok: true,
+				json: () => Promise.resolve(createMockProfileResponse()),
+			})
+
+			const mockClient = createMockOpenAIClient()
+			const wrapped = withSupermemory(mockClient, {
+				containerTag: "user-123",
+				customId: "conv-456",
+			})
+
+			await wrapped.chat.completions.create({
+				model: "gpt-4",
+				messages: [{ role: "user", content: "Hello" }],
+			})
+
+			// 2 fetches: profile search + memory add (default is "always")
+			expect(fetchMock).toHaveBeenCalledTimes(2)
+		})
+	})
+})
diff --git a/packages/tools/test/utils/supermemory-mocks.ts b/packages/tools/test/utils/supermemory-mocks.ts
new file mode 100644
index 000000000..f8eb31d94
--- /dev/null
+++ b/packages/tools/test/utils/supermemory-mocks.ts
@@ -0,0 +1,17 @@
+/**
+ * Shared mock helpers for Supermemory tests.
+ */
+
+export const createMockProfileResponse = (
+	staticMemories: string[] = [],
+	dynamicMemories: string[] = [],
+	searchResults: string[] = [],
+) => ({
+	profile: {
+		static: staticMemories.map((memory) => ({ memory })),
+		dynamic: dynamicMemories.map((memory) => ({ memory })),
+	},
+	searchResults: {
+		results: searchResults.map((memory) => ({ memory })),
+	},
+})
diff --git a/packages/tools/test/with-supermemory/unit.test.ts b/packages/tools/test/with-supermemory/unit.test.ts
index b20eb6f2b..3ea1de8e8 100644
--- a/packages/tools/test/with-supermemory/unit.test.ts
+++ b/packages/tools/test/with-supermemory/unit.test.ts
@@ -32,20 +32,7 @@ const createMockLanguageModel = (): LanguageModelV2 => ({
 	doStream: vi.fn(),
 })
 
-// Mock profile API response
-const createMockProfileResponse = (
-	staticMemories: string[] = [],
-	dynamicMemories: string[] = [],
-	searchResults: string[] = [],
-) => ({
-	profile: {
-		static: staticMemories.map((memory) => ({ memory })),
-		dynamic: dynamicMemories.map((memory) => ({ memory })),
-	},
-	searchResults: {
-		results: searchResults.map((memory) => ({ memory })),
-	},
-})
+import { createMockProfileResponse } from "../utils/supermemory-mocks"
 
 describe("Unit: withSupermemory", () => {
 	let originalEnv: string | undefined