Skip to content

Commit 74f871e

Browse files
authored
Merge pull request #443 from Mng-dev-ai/feat/bridge-inline-image-attachments
Inline image attachments for bridge-backed models
2 parents dea88de + 1852531 commit 74f871e

7 files changed

Lines changed: 165 additions & 34 deletions

File tree

backend/app/constants.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from pathlib import Path
33
from typing import Final
44

5+
from app.models.schemas.settings import ProviderType
6+
57
CLAUDE_DIR: Final[Path] = (
68
Path(d) if (d := os.environ.get("CLAUDE_CONFIG_DIR")) else Path.home() / ".claude"
79
)
@@ -173,3 +175,9 @@
173175
DOCKER_STATUS_RUNNING: Final[str] = "running"
174176

175177
SANDBOX_BASHRC_PATH: Final[str] = "/home/user/.bashrc"
178+
179+
BRIDGE_PROVIDER_TYPES: Final[set[str]] = {
180+
ProviderType.OPENROUTER.value,
181+
ProviderType.OPENAI.value,
182+
ProviderType.COPILOT.value,
183+
}

backend/app/services/chat.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from sqlalchemy import exists, func, select, update
1111
from sqlalchemy.orm import aliased, selectinload
1212

13-
from app.constants import REDIS_KEY_CHAT_STREAM_LIVE
13+
from app.constants import BRIDGE_PROVIDER_TYPES, REDIS_KEY_CHAT_STREAM_LIVE
1414
from app.core.config import get_settings
1515
from app.models.db_models.chat import Chat, Message
1616
from app.models.db_models.enums import MessageRole, MessageStreamStatus, StreamEventKind
@@ -852,6 +852,14 @@ async def initiate_chat_completion(
852852
context_window = self._provider_service.get_model_context_window(
853853
user_settings, request.model_id
854854
)
855+
provider, _ = self._provider_service.get_provider_for_model(
856+
user_settings, request.model_id
857+
)
858+
uses_bridge = (
859+
provider.get("provider_type") in BRIDGE_PROVIDER_TYPES
860+
if provider
861+
else False
862+
)
855863

856864
try:
857865
await self._enqueue_chat_task(
@@ -865,6 +873,7 @@ async def initiate_chat_completion(
865873
assistant_message_id=str(assistant_message.id),
866874
thinking_mode=request.thinking_mode,
867875
worktree=request.worktree,
876+
uses_bridge=uses_bridge,
868877
attachments=attachments,
869878
context_window=context_window,
870879
selected_persona_name=request.selected_persona_name,
@@ -896,6 +905,7 @@ async def _enqueue_chat_task(
896905
assistant_message_id: str,
897906
thinking_mode: str | None,
898907
worktree: bool = False,
908+
uses_bridge: bool = False,
899909
attachments: list[MessageAttachmentDict] | None,
900910
context_window: int | None = None,
901911
selected_persona_name: str = DEFAULT_PERSONA_NAME,
@@ -925,6 +935,7 @@ async def _enqueue_chat_task(
925935
assistant_message_id=assistant_message_id,
926936
thinking_mode=thinking_mode,
927937
worktree=worktree,
938+
uses_bridge=uses_bridge,
928939
attachments=stream_attachments,
929940
selected_persona_name=selected_persona_name,
930941
)

backend/app/services/claude_agent.py

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import base64
12
import logging
3+
import mimetypes
24
import sys
35
from collections.abc import AsyncIterator, Callable
46
from functools import partial
@@ -13,6 +15,7 @@
1315
)
1416

1517
from app.constants import (
18+
BRIDGE_PROVIDER_TYPES,
1619
SANDBOX_GIT_ASKPASS_PATH,
1720
SANDBOX_HOME_DIR,
1821
SANDBOX_WORKSPACE_DIR,
@@ -188,16 +191,21 @@ async def stream_response(
188191
session_callback: Callable[[str, str | None], None] | None = None,
189192
attachments: list[dict[str, Any]] | None = None,
190193
attachment_base_dir: str = SANDBOX_HOME_DIR,
194+
uses_bridge: bool = False,
191195
) -> AsyncIterator[StreamEvent]:
192196
# Send a prompt to the Claude SDK client and yield processed stream
193197
# events, handling plan mode transitions on tool success/failure.
194-
user_prompt = self.prepare_user_prompt(
195-
prompt, custom_instructions, attachments, attachment_base_dir
198+
user_content = self.build_user_message_content(
199+
prompt,
200+
custom_instructions,
201+
attachments,
202+
attachment_base_dir,
203+
uses_bridge=uses_bridge,
196204
)
197205

198206
prompt_message = {
199207
"type": "user",
200-
"message": {"role": MessageRole.USER.value, "content": user_prompt},
208+
"message": {"role": MessageRole.USER.value, "content": user_content},
201209
"parent_tool_use_id": None,
202210
"session_id": session_id,
203211
}
@@ -252,11 +260,7 @@ def _build_auth_env(
252260
# existing login; in Docker mode an explicit token is needed.
253261
if auth_token:
254262
env["CLAUDE_CODE_OAUTH_TOKEN"] = auth_token
255-
elif provider_type in (
256-
ProviderType.OPENROUTER.value,
257-
ProviderType.OPENAI.value,
258-
ProviderType.COPILOT.value,
259-
):
263+
elif provider_type in BRIDGE_PROVIDER_TYPES:
260264
# Non-Anthropic providers route through our local bridge
261265
# (https://github.com/Mng-dev-ai/anthropic-bridge) that translates
262266
# Anthropic API calls to the provider's format.
@@ -625,6 +629,80 @@ def prepare_user_prompt(
625629
parts.append(f"<user_prompt>{prompt}</user_prompt>")
626630
return "".join(parts)
627631

632+
@classmethod
633+
def build_user_message_content(
634+
cls,
635+
prompt: str,
636+
custom_instructions: str | None,
637+
attachments: list[dict[str, Any]] | None = None,
638+
attachment_base_dir: str = SANDBOX_HOME_DIR,
639+
uses_bridge: bool = False,
640+
) -> str | list[dict[str, Any]]:
641+
# Anthropic's API handles image attachments natively, but bridge-backed
642+
# providers (OpenAI, OpenRouter, Copilot) need images inlined as base64
643+
# content blocks since the bridge translates to their format.
644+
if not uses_bridge:
645+
return cls.prepare_user_prompt(
646+
prompt,
647+
custom_instructions,
648+
attachments,
649+
attachment_base_dir,
650+
)
651+
652+
# Slash commands must stay as bare strings for the SDK to handle them.
653+
if any(prompt.startswith(cmd) for cmd in ALLOWED_SLASH_COMMANDS):
654+
return prompt
655+
656+
image_blocks: list[dict[str, Any]] = []
657+
for attachment in attachments or []:
658+
if attachment.get("file_type") == "image":
659+
image_block = cls._attachment_to_image_block(attachment)
660+
if image_block:
661+
image_blocks.append(image_block)
662+
663+
# Keep all attachments (including images) in the text block so the
664+
# model sees filenames and can map them to the inline image blocks.
665+
prompt_text = cls.prepare_user_prompt(
666+
prompt,
667+
custom_instructions,
668+
attachments,
669+
attachment_base_dir,
670+
)
671+
if not image_blocks:
672+
return prompt_text
673+
674+
return [{"type": "text", "text": prompt_text}, *image_blocks]
675+
676+
@staticmethod
677+
def _attachment_to_image_block(attachment: dict[str, Any]) -> dict[str, Any] | None:
678+
relative_path = attachment.get("file_path")
679+
if not isinstance(relative_path, str) or not relative_path:
680+
return None
681+
682+
file_path = Path(settings.STORAGE_PATH) / relative_path
683+
try:
684+
raw = file_path.read_bytes()
685+
except OSError:
686+
logger.warning(
687+
"Image attachment not found for inline bridge send: %s",
688+
file_path,
689+
)
690+
return None
691+
692+
mime_type, _ = mimetypes.guess_type(str(file_path))
693+
media_type = (
694+
mime_type if mime_type and mime_type.startswith("image/") else "image/png"
695+
)
696+
encoded = base64.b64encode(raw).decode("ascii")
697+
return {
698+
"type": "image",
699+
"source": {
700+
"type": "base64",
701+
"media_type": media_type,
702+
"data": encoded,
703+
},
704+
}
705+
628706
@staticmethod
629707
async def _create_prompt_iterable(
630708
prompt_message: dict[str, Any],

backend/app/services/sandbox.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from app.constants import (
1515
ANTHROPIC_BRIDGE_HOST,
1616
ANTHROPIC_BRIDGE_PORT,
17+
BRIDGE_PROVIDER_TYPES,
1718
SANDBOX_CLAUDE_DIR,
1819
SANDBOX_CLAUDE_JSON_PATH,
1920
SANDBOX_GIT_ASKPASS_PATH,
@@ -626,13 +627,8 @@ def _has_bridge_provider(
626627
) -> bool:
627628
if not custom_providers:
628629
return False
629-
bridge_types = {
630-
ProviderType.OPENROUTER.value,
631-
ProviderType.OPENAI.value,
632-
ProviderType.COPILOT.value,
633-
}
634630
return any(
635-
provider.get("provider_type") in bridge_types
631+
provider.get("provider_type") in BRIDGE_PROVIDER_TYPES
636632
and provider.get("enabled", True)
637633
for provider in custom_providers
638634
)

backend/app/services/scheduler.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from sqlalchemy.exc import SQLAlchemyError
1717
from sqlalchemy.ext.asyncio import AsyncSession
1818

19+
from app.constants import BRIDGE_PROVIDER_TYPES
1920
from app.core.config import get_settings
2021
from app.models.db_models.chat import Chat, Message
2122
from app.models.db_models.workspace import Workspace
@@ -730,9 +731,18 @@ async def run_scheduled_task(
730731

731732
system_prompt = build_system_prompt_for_chat(user_settings)
732733

733-
context_window = ProviderService().get_model_context_window(
734+
provider_service = ProviderService()
735+
context_window = provider_service.get_model_context_window(
734736
user_settings, model_id
735737
)
738+
provider, _ = provider_service.get_provider_for_model(
739+
user_settings, model_id
740+
)
741+
uses_bridge = (
742+
provider.get("provider_type") in BRIDGE_PROVIDER_TYPES
743+
if provider
744+
else False
745+
)
736746
stream_request = ChatStreamRequest(
737747
prompt=prompt_message,
738748
system_prompt=system_prompt,
@@ -744,6 +754,7 @@ async def run_scheduled_task(
744754
session_id=None,
745755
assistant_message_id=str(assistant_message_id),
746756
thinking_mode="ultra",
757+
uses_bridge=uses_bridge,
747758
attachments=None,
748759
selected_persona_name=DEFAULT_PERSONA_NAME,
749760
)

backend/app/services/streaming/runtime.py

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from sqlalchemy.orm import selectinload
1717

1818
from app.constants import (
19+
BRIDGE_PROVIDER_TYPES,
1920
REDIS_KEY_CHAT_CONTEXT_USAGE,
2021
REDIS_KEY_CHAT_STREAM_LIVE,
2122
SANDBOX_HOME_DIR,
@@ -147,6 +148,7 @@ def __init__(
147148
self.session_container: dict[str, Any] = {"session_id": request.session_id}
148149
self.assistant_message_id = request.assistant_message_id
149150
self.model_id = request.model_id
151+
self.uses_bridge = request.uses_bridge
150152
self.context_window = request.context_window
151153
self.prompt = request.prompt
152154
self._is_new_chat = request.session_id is None
@@ -168,6 +170,20 @@ def __init__(
168170
self._cancelled: bool = False
169171
self._send_now_pending: bool = False
170172

173+
@property
174+
def attachment_base_dir(self) -> str:
175+
# Files are written to the sandbox home dir
176+
# (/home/user/ → {host_base}/{sandbox_id}/), so in host mode
177+
# point Claude at the real host path, not the workspace path.
178+
if self.chat.sandbox_provider == SandboxProviderType.HOST:
179+
sid = self.chat.sandbox_id
180+
if sid:
181+
host_base = (
182+
Path(settings.get_host_sandbox_base_dir()).expanduser().resolve()
183+
)
184+
return str(host_base / sid)
185+
return SANDBOX_HOME_DIR
186+
171187
async def run(
172188
self,
173189
ai_service: ClaudeAgentService,
@@ -495,21 +511,34 @@ async def _write_send_now(self, ai_service: ClaudeAgentService) -> bool:
495511
self.model_id = resolved_model
496512
if self._session:
497513
self._session.current_model = resolved_model
514+
user_settings = await UserService(
515+
session_factory=self.session_factory
516+
).get_user_settings(self.chat.user_id, db=None)
517+
provider, _ = ProviderService().get_provider_for_model(
518+
user_settings, queued_model
519+
)
520+
self.uses_bridge = (
521+
provider.get("provider_type") in BRIDGE_PROVIDER_TYPES
522+
if provider
523+
else False
524+
)
498525
queued_permission = queued_msg.get("permission_mode")
499526
if queued_permission:
500527
sdk_permission = SDK_PERMISSION_MODE_MAP.get(
501528
queued_permission, "acceptEdits"
502529
)
503530
await self.client.set_permission_mode(sdk_permission)
504531

505-
prompt = ai_service.prepare_user_prompt(
532+
user_content = ai_service.build_user_message_content(
506533
queued_msg["content"],
507534
self.custom_instructions,
508535
queued_msg.get("attachments"),
536+
self.attachment_base_dir,
537+
uses_bridge=self.uses_bridge,
509538
)
510539
injection = {
511540
"type": "user",
512-
"message": {"role": "user", "content": prompt},
541+
"message": {"role": "user", "content": user_content},
513542
"parent_tool_use_id": None,
514543
"session_id": self.session_container.get("session_id"),
515544
}
@@ -848,9 +877,18 @@ def _build_queued_stream_request(
848877
user_settings,
849878
selected_persona_name=selected_persona_name,
850879
)
851-
context_window = ProviderService().get_model_context_window(
880+
provider_service = ProviderService()
881+
context_window = provider_service.get_model_context_window(
852882
user_settings, queued_msg["model_id"]
853883
)
884+
provider, _ = provider_service.get_provider_for_model(
885+
user_settings, queued_msg["model_id"]
886+
)
887+
uses_bridge = (
888+
provider.get("provider_type") in BRIDGE_PROVIDER_TYPES
889+
if provider
890+
else False
891+
)
854892
return ChatStreamRequest(
855893
prompt=queued_msg["content"],
856894
system_prompt=system_prompt,
@@ -872,6 +910,7 @@ def _build_queued_stream_request(
872910
assistant_message_id=assistant_message_id,
873911
thinking_mode=queued_msg.get("thinking_mode"),
874912
worktree=queued_msg.get("worktree", False),
913+
uses_bridge=uses_bridge,
875914
attachments=queued_msg.get("attachments"),
876915
selected_persona_name=selected_persona_name,
877916
)
@@ -1117,20 +1156,6 @@ async def execute_chat(
11171156
params.options.permission_mode
11181157
)
11191158
stream_result = StreamResult()
1120-
attachment_base_dir = SANDBOX_HOME_DIR
1121-
if runtime.chat.sandbox_provider == SandboxProviderType.HOST:
1122-
# Files are written to the sandbox home dir
1123-
# (/home/user/ → {host_base}/{sandbox_id}/), so
1124-
# point Claude at the real host path, not the
1125-
# workspace path.
1126-
host_base = (
1127-
Path(settings.get_host_sandbox_base_dir())
1128-
.expanduser()
1129-
.resolve()
1130-
)
1131-
sid = runtime.chat.sandbox_id
1132-
assert sid is not None
1133-
attachment_base_dir = str(host_base / sid)
11341159
stream = ai_service.stream_response(
11351160
client=session.client,
11361161
prompt=request.prompt,
@@ -1139,7 +1164,8 @@ async def execute_chat(
11391164
result=stream_result,
11401165
session_callback=session_callback,
11411166
attachments=request.attachments,
1142-
attachment_base_dir=attachment_base_dir,
1167+
attachment_base_dir=runtime.attachment_base_dir,
1168+
uses_bridge=request.uses_bridge,
11431169
)
11441170
return await runtime.run(ai_service, stream_result, stream)
11451171
except (

0 commit comments

Comments
 (0)