AmritaBot
diff --git a/‎docs/docs/.vitepress/config.mts‎
Lines changed: 4 additions & 4 deletions b/‎docs/docs/.vitepress/config.mts‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/ChatManager.md‎
Lines changed: 0 additions & 8 deletions b/‎docs/docs/guide/api-reference/classes/ChatManager.md‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/ClientManager.md‎
Lines changed: 0 additions & 2 deletions b/‎docs/docs/guide/api-reference/classes/ClientManager.md‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/MemoryLimiter.md‎
Lines changed: 0 additions & 4 deletions b/‎docs/docs/guide/api-reference/classes/MemoryLimiter.md‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/ModelAdapter.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/docs/guide/api-reference/classes/ModelAdapter.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/docs/guide/api-reference/classes/RequestMetadata.md‎
Lines changed: 34 additions & 0 deletions b/‎docs/docs/guide/api-reference/classes/RequestMetadata.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/StrategyLikedObject.md‎
Lines changed: 0 additions & 14 deletions b/‎docs/docs/guide/api-reference/classes/StrategyLikedObject.md‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/SuspendObjectStream.md‎
Lines changed: 0 additions & 33 deletions b/‎docs/docs/guide/api-reference/classes/SuspendObjectStream.md‎
Lines changed: 0 additions & 33 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/UniResponse.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/docs/guide/api-reference/classes/UniResponse.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/docs/guide/api-reference/classes/UniResponseUsage.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/docs/guide/api-reference/classes/UniResponseUsage.md‎
Lines changed: 2 additions & 0 deletions
@@ -309,8 +309,8 @@ export default withMermaid({
                 link: "/guide/api-reference/classes/SuspendEnum",
               },
               {
-                text: "SuspendObjectStream",
-                link: "/guide/api-reference/classes/SuspendObjectStream",
+                text: "RequestMetadata",
+                link: "/guide/api-reference/classes/RequestMetadata",
               },
               {
                 text: "EmbeddingChunk",
@@ -663,8 +663,8 @@ export default withMermaid({
                 link: "/zh/guide/api-reference/classes/SuspendEnum",
               },
               {
-                text: "SuspendObjectStream",
-                link: "/zh/guide/api-reference/classes/SuspendObjectStream",
+                text: "RequestMetadata",
+                link: "/zh/guide/api-reference/classes/RequestMetadata",
               },
               {
                 text: "EmbeddingChunk",
 
@@ -32,16 +32,12 @@ Clean up running chat objects under the specified key, keeping only up to `maxit
 
 **Returns:** `bool` — `True` if cleanup was performed, `False` otherwise
 
----
-
 ### `get_all_objs() -> list[ChatObjectMeta]`
 
 Get metadata for all running chat objects across all sessions.
 
 **Returns:** `list[ChatObjectMeta]` — List of all running chat object metadata snapshots
 
----
-
 ### `get_objs(session_id: str) -> list[ChatObject]`
 
 Get all active chat objects for a given session ID.
@@ -52,8 +48,6 @@ Get all active chat objects for a given session ID.
 
 **Returns:** `list[ChatObject]` — List of chat objects for the session
 
----
-
 ### `async clean_chat_objects(maxitems: int = 10) -> None`
 
 Asynchronously clean up all running chat objects across all sessions, limiting each session to `maxitems` objects.
@@ -62,8 +56,6 @@ Asynchronously clean up all running chat objects across all sessions, limiting e
 
 - `maxitems` (`int`, optional): Maximum number of objects per session. Defaults to `10`.
 
----
-
 ### `async add_chat_object(chat_object: ChatObject) -> None`
 
 Register a new `ChatObject` instance with the manager. Creates a metadata snapshot and inserts the object at the beginning of the session's list.
 
@@ -57,8 +57,6 @@ Initializes the ClientManager (runs only once due to singleton pattern).
 
 **Note:** Initialization logic executes only on the first instantiation.
 
----
-
 _All other methods are inherited from [`MultiClientManager`](MultiClientManager.md):_
 
 - `get_client_by_script(server_script)` - Get client by server script
 
@@ -66,16 +66,12 @@ Override the default abstract instruction used for context summarization.
 - `TypeError`: If instruction is not a string
 - `ValueError`: If instruction is empty
 
----
-
 ### `get_abstract_instruction() -> str`
 
 Get the current abstract instruction text.
 
 **Returns:** `str`
 
----
-
 ### `reset_abstract_instruction()`
 
 Reset the abstract instruction to the framework default.
 
@@ -8,7 +8,7 @@ The `ModelAdapter` class provides a unified interface for integrating different
 
 Adapters are automatically registered with the [`AdapterManager`](#adaptermanager) when defined, unless marked as abstract or explicitly disabled from registration.
 
-> **Note**: The `ModelAdapter` base class has been moved from `amrita_core.protocol` to `amrita_core.base.adapter`. The `amrita_core.protocol` module is now a deprecated re-export wrapper.
+> **Note**: The `ModelAdapter` base class has been moved from `amrita_core.protocol` to `amrita_core.base.adapter`. The `amrita_core.protocol` compatibility endpoint was removed in v0.10.x+; import from `amrita_core.base.adapter`.
 
 ## Class Definition
 
 
@@ -0,0 +1,34 @@
+# RequestMetadata
+
+`RequestMetadata` captures per-request diagnostic information returned by every adapter call through `UniResponse.metadata`.
+
+## Properties
+
+- `request_id` (str): Auto-generated unique request ID (UUID4). Defaults to a new UUID if not provided.
+- `original_request_id` (str | None): Original request ID returned by the LLM provider adapter (e.g., OpenAI's `_request_id`, Anthropic's `request_id`). `None` when unavailable.
+- `model` (str): The model used for the request. Defaults to `"__NOT_GIVEN__"` when not available (e.g., streaming before the first chunk).
+- `stop_sequence` (str | None): The stop sequence that terminated generation, if any.
+- `stop_reason` (STOP_REASON | None): Why the generation stopped. One of:
+
+  | Value             | Meaning                    |
+  | ----------------- | -------------------------- |
+  | `"end_turn"`      | Natural completion         |
+  | `"max_tokens"`    | Hit max token limit        |
+  | `"stop_sequence"` | Matched a stop sequence    |
+  | `"tool_use"`      | Model called a tool        |
+  | `"pause_turn"`    | Anthropic pause turn       |
+  | `"refusal"`       | Content filtered / refused |
+
+## Usage
+
+```python
+from amrita_core.types.response import RequestMetadata
+
+# Accessed via UniResponse
+response: UniResponse = ...
+print(response.metadata.model)          # e.g. "gpt-4o"
+print(response.metadata.stop_reason)    # e.g. "end_turn"
+print(response.metadata.original_request_id)  # Provider's request ID
+```
+
+> **Note**: `extra="allow"` is configured, so provider-specific fields may appear in addition to the standard ones.
@@ -44,8 +44,6 @@ Called once by the framework when the execution context is ready. Subclasses may
 
 **Returns:** `Self`
 
----
-
 ### `async single_execute() -> bool`
 
 Execute a single agent step for `"agent"` and `"agent-mixed"` category strategies. Called by the framework to perform one iteration of tool calling.
@@ -54,8 +52,6 @@ Execute a single agent step for `"agent"` and `"agent-mixed"` category strategie
 
 **Note:** This method is used by `"agent"` and `"agent-mixed"` category strategies. `"rag"` and `"workflow"` category strategies should implement `run()` instead.
 
----
-
 ### `async run() -> None`
 
 Run the complete agent strategy for `"rag"` and `"workflow"` category strategies. Gives full control to the strategy implementation for managing tool calling iterations, context construction, error handling, and response generation.
@@ -67,8 +63,6 @@ Run the complete agent strategy for `"rag"` and `"workflow"` category strategies
 
 **Note:** This method is used by `"rag"` and `"workflow"` category strategies. `"agent"` and `"agent-mixed"` category strategies should implement `single_execute()` instead.
 
----
-
 ### `async call_tool(tool_call: ToolCall) -> str`
 
 Execute a single tool call without modifying the agent's context.
@@ -83,16 +77,12 @@ Execute a single tool call without modifying the agent's context.
 
 **Returns:** `str` — The string response from the tool execution, or a default message if the tool returns `None`
 
----
-
 ### `async on_limited() -> None`
 
 Handle the event when the agent reaches its tool calling limit. Called when the agent strategy has reached the maximum allowed number of tool calls.
 
 **Default behavior:** Sends a notification message to the user about the limit being reached.
 
----
-
 ### `async on_exception(exc: BaseException) -> None`
 
 Handle exceptions that occur during strategy execution.
@@ -101,14 +91,10 @@ Handle exceptions that occur during strategy execution.
 
 - `exc` (`BaseException`): The exception that occurred
 
----
-
 ### `async on_post_process() -> None`
 
 Used to process after all steps are completed successfully.
 
----
-
 ### `classmethod get_category() -> Literal["agent", "workflow", "rag", "agent-mixed"]`
 
 Get the category of the agent strategy. This is an abstract method that must be implemented by subclasses.
 
@@ -10,6 +10,7 @@ The UniResponse class provides a unified response format.
 - `tool_calls` (T_TOOL): Tool call results, T_TOOL is a generic parameter
 - `reasoning_content` (str | None): Reasoning/thinking content from the model, if the model supports it (e.g., o1, Claude with extended thinking)
 - `reasoning_signature` (str | None): Anthropic thinking signature, required for round-tripping thinking content with Anthropic API
+- `metadata` ([RequestMetadata](RequestMetadata.md)): Request metadata containing request ID, model name, stop reason, and original provider request ID
 
 ## Description
 
 
@@ -7,6 +7,8 @@ UniResponseUsage class represents usage statistics for responses.
 - `prompt_tokens` (T_INT): Number of tokens used in the prompt
 - `completion_tokens` (T_INT): Number of tokens used in the completion (generation)
 - `total_tokens` (T_INT): Total number of tokens used
+- `cache_creation` (int | None): Number of tokens used to create the cache entry (Anthropic prompt caching)
+- `cache_hit` (int | None): Number of tokens read from the cache (Anthropic prompt caching)
 
 ## Description
Original file line number	Diff line number	Diff line change
`@@ -309,8 +309,8 @@ export default withMermaid({`
`309`	`309`	`link: "/guide/api-reference/classes/SuspendEnum",`
`310`	`310`	`},`
`311`	`311`	`{`
`312`		`- text: "SuspendObjectStream",`
`313`		`- link: "/guide/api-reference/classes/SuspendObjectStream",`
	`312`	`+ text: "RequestMetadata",`
	`313`	`+ link: "/guide/api-reference/classes/RequestMetadata",`
`314`	`314`	`},`
`315`	`315`	`{`
`316`	`316`	`text: "EmbeddingChunk",`
`@@ -663,8 +663,8 @@ export default withMermaid({`
`663`	`663`	`link: "/zh/guide/api-reference/classes/SuspendEnum",`
`664`	`664`	`},`
`665`	`665`	`{`
`666`		`- text: "SuspendObjectStream",`
`667`		`- link: "/zh/guide/api-reference/classes/SuspendObjectStream",`
	`666`	`+ text: "RequestMetadata",`
	`667`	`+ link: "/zh/guide/api-reference/classes/RequestMetadata",`
`668`	`668`	`},`
`669`	`669`	`{`
`670`	`670`	`text: "EmbeddingChunk",`