strands-agents
diff --git a/‎AGENTS.md‎
Lines changed: 4 additions & 1 deletion b/‎AGENTS.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/strands/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎src/strands/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/strands/models/anthropic.py‎
Lines changed: 3 additions & 3 deletions b/‎src/strands/models/anthropic.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/strands/models/bedrock.py‎
Lines changed: 21 additions & 10 deletions b/‎src/strands/models/bedrock.py‎
Lines changed: 21 additions & 10 deletions
diff --git a/‎src/strands/models/gemini.py‎
Lines changed: 3 additions & 3 deletions b/‎src/strands/models/gemini.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/strands/models/llamacpp.py‎
Lines changed: 3 additions & 3 deletions b/‎src/strands/models/llamacpp.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/strands/models/ollama.py‎
Lines changed: 1 addition & 1 deletion b/‎src/strands/models/ollama.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/strands/models/openai_responses.py‎
Lines changed: 3 additions & 3 deletions b/‎src/strands/models/openai_responses.py‎
Lines changed: 3 additions & 3 deletions
@@ -130,8 +130,11 @@ strands-agents/
 │   │
 │   ├── plugins/                          # Plugin system
 │   │   ├── plugin.py                     # Plugin base class
+│   │   ├── multiagent_plugin.py          # MultiAgentPlugin base class
 │   │   ├── decorator.py                  # @hook decorator
-│   │   └── registry.py                   # PluginRegistry for tracking plugins
+│   │   ├── registry.py                   # PluginRegistry for tracking agent plugins
+│   │   ├── multiagent_registry.py        # Registry for tracking orchestrator plugins
+│   │   └── _discovery.py                 # Shared hook/tool discovery utilities
 │   │
 │   ├── handlers/                         # Event handlers
 │   │   └── callback_handler.py           # Callback handling
 
@@ -20,6 +20,7 @@
     <a href="https://github.com/strands-agents/sdk-python/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/strands-agents/sdk-python"/></a>
     <a href="https://pypi.org/project/strands-agents/"><img alt="PyPI version" src="https://img.shields.io/pypi/v/strands-agents"/></a>
     <a href="https://python.org"><img alt="Python versions" src="https://img.shields.io/pypi/pyversions/strands-agents"/></a>
+    <a href="https://discord.gg/strands"><img alt="Strands Discord" src="https://img.shields.io/badge/Discord-Strands-5865F2?logo=discord&logoColor=white"/></a>
   </div>
 
   <p>
@@ -316,6 +317,9 @@ We welcome contributions! See our [Contributing Guide](CONTRIBUTING.md) for deta
 - Code of Conduct
 - Reporting of security issues
 
+## Stay in touch with the team
+Come meet the Strands team and other users on [**Discord**](https://discord.com/invite/strands)
+
 ## License
 
 This project is licensed under the Apache License 2.0 - see the [LICENSE](LICENSE) file for details.
 
@@ -68,8 +68,8 @@ a2a = [
     "a2a-sdk[sql]>=0.3.0,<0.4.0",
     "uvicorn>=0.34.2,<1.0.0",
     "httpx>=0.28.1,<1.0.0",
-    "fastapi>=0.115.12,<1.0.0",
-    "starlette>=0.46.2,<1.0.0",
+    "fastapi>=0.133.0,<1.0.0",
+    "starlette>=1.0.0,<2.0.0",
 ]
 
 bidi = [
 
@@ -4,7 +4,7 @@
 from .agent.agent import Agent
 from .agent.base import AgentBase
 from .event_loop._retry import ModelRetryStrategy
-from .plugins import Plugin
+from .plugins import MultiAgentPlugin, Plugin
 from .tools.decorator import tool
 from .types._snapshot import Snapshot
 from .types.tools import ToolContext
@@ -17,6 +17,7 @@
     "agent",
     "models",
     "ModelRetryStrategy",
+    "MultiAgentPlugin",
     "Plugin",
     "Skill",
     "Snapshot",
 
@@ -58,8 +58,8 @@ class AnthropicConfig(BaseModelConfig, total=False):
             params: Additional model parameters (e.g., temperature).
                 For a complete list of supported parameters, see https://docs.anthropic.com/en/api/messages.
             use_native_token_count: Whether to use the native Anthropic count_tokens API.
-                When True (default), count_tokens() calls the Anthropic API for accurate counts.
-                When False, skips the API call and uses the local estimator.
+                When True, count_tokens() calls the Anthropic API for accurate counts.
+                When False (default), skips the API call and uses the local estimator.
         """
 
         max_tokens: Required[int]
@@ -398,7 +398,7 @@ async def count_tokens(
         Returns:
             Total input token count.
         """
-        if self.config.get("use_native_token_count") is False:
+        if self.config.get("use_native_token_count") is not True:
             return await super().count_tokens(messages, tool_specs, system_prompt, system_prompt_content)
 
         try:
 
@@ -55,13 +55,13 @@
     "anthropic.claude",
 ]
 
-# Cache of model IDs that do not support the CountTokens API.
-_UNSUPPORTED_COUNT_TOKENS_MODELS: set[str] = set()
+# Cache of model IDs for which CountTokens API calls should be skipped.
+_SKIP_COUNT_TOKENS_MODELS: set[str] = set()
 
 
-def _clear_unsupported_count_tokens_cache() -> None:
-    """Clear the cache of model IDs that do not support the CountTokens API."""
-    _UNSUPPORTED_COUNT_TOKENS_MODELS.clear()
+def _clear_skip_count_tokens_cache() -> None:
+    """Clear the cache of model IDs for which CountTokens API calls should be skipped."""
+    _SKIP_COUNT_TOKENS_MODELS.clear()
 
 
 def _suppress_task_exception(task: "asyncio.Task[None]") -> None:
@@ -124,8 +124,8 @@ class BedrockConfig(BaseModelConfig, total=False):
             temperature: Controls randomness in generation (higher = more random)
             top_p: Controls diversity via nucleus sampling (alternative to temperature)
             use_native_token_count: Whether to use the native Bedrock CountTokens API.
-                When True (default), count_tokens() calls the Bedrock API for accurate counts.
-                When False, skips the API call and uses the local estimator.
+                When True, count_tokens() calls the Bedrock API for accurate counts.
+                When False (default), skips the API call and uses the local estimator.
         """
 
         additional_args: dict[str, Any] | None
@@ -804,12 +804,12 @@ async def count_tokens(
         Returns:
             Total input token count.
         """
-        if self.config.get("use_native_token_count") is False:
+        if self.config.get("use_native_token_count") is not True:
             return await super().count_tokens(messages, tool_specs, system_prompt, system_prompt_content)
 
         model_id: str = self.config["model_id"]
 
-        if model_id in _UNSUPPORTED_COUNT_TOKENS_MODELS:
+        if model_id in _SKIP_COUNT_TOKENS_MODELS:
             return await super().count_tokens(messages, tool_specs, system_prompt, system_prompt_content)
 
         try:
@@ -839,6 +839,17 @@ async def count_tokens(
             return total_tokens
         except Exception as e:
             if (
+                isinstance(e, ClientError)
+                and e.response.get("Error", {}).get("Code") == "AccessDeniedException"
+            ):
+                logger.warning(
+                    "model_id=<%s> | bedrock:CountTokens permission denied,"
+                    " falling back to heuristic estimation: %s",
+                    model_id,
+                    e,
+                )
+                _SKIP_COUNT_TOKENS_MODELS.add(model_id)
+            elif (
                 isinstance(e, ClientError)
                 and e.response.get("Error", {}).get("Code") == "ValidationException"
                 and "doesn't support counting tokens" in str(e)
@@ -848,7 +859,7 @@ async def count_tokens(
                     " falling back to estimation",
                     model_id,
                 )
-                _UNSUPPORTED_COUNT_TOKENS_MODELS.add(model_id)
+                _SKIP_COUNT_TOKENS_MODELS.add(model_id)
             else:
                 logger.debug(
                     "model_id=<%s>, error=<%s> | native token counting failed, falling back to estimation",
 
@@ -50,8 +50,8 @@ class GeminiConfig(BaseModelConfig, total=False):
                 For a complete list of supported tools, see
                 https://ai.google.dev/api/caching#Tool
             use_native_token_count: Whether to use the native Gemini count_tokens API.
-                When True (default), count_tokens() calls the Gemini API for accurate counts.
-                When False, skips the API call and uses the local estimator.
+                When True, count_tokens() calls the Gemini API for accurate counts.
+                When False (default), skips the API call and uses the local estimator.
         """
 
         model_id: Required[str]
@@ -461,7 +461,7 @@ async def count_tokens(
         Returns:
             Total input token count.
         """
-        if self.config.get("use_native_token_count") is False:
+        if self.config.get("use_native_token_count") is not True:
             return await super().count_tokens(messages, tool_specs, system_prompt, system_prompt_content)
 
         try:
 
@@ -126,8 +126,8 @@ class LlamaCppConfig(BaseModelConfig, total=False):
                 - slot_id: Slot ID for parallel inference
                 - samplers: Custom sampler order
             use_native_token_count: Whether to use the native llama.cpp /tokenize endpoint.
-                When True (default), count_tokens() calls the server's tokenize endpoint for accurate counts.
-                When False, skips the API call and uses the local estimator.
+                When True, count_tokens() calls the server's tokenize endpoint for accurate counts.
+                When False (default), skips the API call and uses the local estimator.
         """
 
         model_id: str
@@ -537,7 +537,7 @@ async def count_tokens(
         Returns:
             Total input token count.
         """
-        if self.config.get("use_native_token_count") is False:
+        if self.config.get("use_native_token_count") is not True:
             return await super().count_tokens(messages, tool_specs, system_prompt, system_prompt_content)
 
         try:
 
@@ -280,7 +280,7 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
                             "totalTokens": event["data"].eval_count + event["data"].prompt_eval_count,
                         },
                         "metrics": {
-                            "latencyMs": event["data"].total_duration / 1e6,
+                            "latencyMs": int(event["data"].total_duration / 1e6),
                         },
                     },
                 }
 
@@ -137,8 +137,8 @@ class OpenAIResponsesConfig(BaseModelConfig, total=False):
                 When True, the server stores conversation history and the client does not need to
                 send the full message history with each request. Defaults to False.
             use_native_token_count: Whether to use the native OpenAI input_tokens.count API.
-                When True (default), count_tokens() calls the OpenAI API for accurate counts.
-                When False, skips the API call and uses the local estimator.
+                When True, count_tokens() calls the OpenAI API for accurate counts.
+                When False (default), skips the API call and uses the local estimator.
         """
 
         model_id: str
@@ -242,7 +242,7 @@ async def count_tokens(
         Returns:
             Total input token count.
         """
-        if self.config.get("use_native_token_count") is False:
+        if self.config.get("use_native_token_count") is not True:
             return await super().count_tokens(messages, tool_specs, system_prompt, system_prompt_content)
 
         try:
Original file line number	Diff line number	Diff line change
`@@ -280,7 +280,7 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:`
`280`	`280`	`"totalTokens": event["data"].eval_count + event["data"].prompt_eval_count,`
`281`	`281`	`},`
`282`	`282`	`"metrics": {`
`283`		`- "latencyMs": event["data"].total_duration / 1e6,`
	`283`	`+ "latencyMs": int(event["data"].total_duration / 1e6),`
`284`	`284`	`},`
`285`	`285`	`},`
`286`	`286`	`}`