Skip to content

Commit af5e864

Browse files
feat(plugin): download inbound media to local path for vision pipeline
Download images sent by users to ~/.openclaw/media/inbound/ so that OpenClaw vision pipeline can attach them via MediaPath instead of relying on remote URL fetching. Also adds console.error debug logging for message and media flow.
1 parent 4d4b38e commit af5e864

1 file changed

Lines changed: 43 additions & 13 deletions

File tree

packages/plugin/src/channel.ts

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ async function handleCloudMessage(
438438
}
439439

440440
ctx.log?.info(`[${ctx.accountId}] Message from ${msg.userId}: ${text.slice(0, 80)}${msg.mediaUrl ? " [+image]" : ""}`);
441+
console.error(`[botschat-msg] from=${msg.userId} text=${text.slice(0,40)} mediaUrl=${msg.mediaUrl ?? "none"}`);
441442

442443
try {
443444
const runtime = getBotsChatRuntime();
@@ -475,6 +476,45 @@ async function handleCloudMessage(
475476
ctx.log?.info(`[${ctx.accountId}] Thread parent context injected (${parentText.length} chars)`);
476477
}
477478

479+
// Download inbound media (if any) to a local path so OpenClaw's
480+
// vision pipeline can attach it to the model (requires MediaPath).
481+
console.error(`[botschat-media] mediaUrl=${msg.mediaUrl ?? "none"}`);
482+
if (msg.mediaUrl) {
483+
let resolvedUrl = msg.mediaUrl;
484+
if (resolvedUrl.startsWith("/")) {
485+
const baseUrl = cloudUrls.get(ctx.accountId);
486+
if (baseUrl) {
487+
resolvedUrl = baseUrl.replace(/\/$/, "") + resolvedUrl;
488+
}
489+
}
490+
try {
491+
const os = await import("os");
492+
const fsM = await import("fs");
493+
const pathM = await import("path");
494+
const mediaDir = pathM.join(os.homedir(), ".openclaw", "media", "inbound");
495+
await fsM.promises.mkdir(mediaDir, { recursive: true });
496+
ctx.log?.info(`[${ctx.accountId}] Downloading media from ${resolvedUrl}`);
497+
const resp = await fetch(resolvedUrl);
498+
if (resp.ok) {
499+
const buffer = Buffer.from(await resp.arrayBuffer());
500+
const contentType = resp.headers.get("content-type") || "image/png";
501+
const extMap: Record<string, string> = { "image/png": ".png", "image/jpeg": ".jpg", "image/gif": ".gif", "image/webp": ".webp" };
502+
const ext = extMap[contentType] || ".png";
503+
const fileName = `botschat-${Date.now()}-${Math.random().toString(36).slice(2, 8)}${ext}`;
504+
const filePath = pathM.join(mediaDir, fileName);
505+
await fsM.promises.writeFile(filePath, buffer);
506+
ctx.log?.info(`[${ctx.accountId}] Downloaded media to ${filePath} (${buffer.length} bytes, ${contentType})`);
507+
(msg as any).__resolvedMedia = { MediaUrl: resolvedUrl, MediaPath: filePath, MediaType: contentType, NumMedia: "1" };
508+
} else {
509+
ctx.log?.error(`[${ctx.accountId}] Failed to download media: HTTP ${resp.status}`);
510+
(msg as any).__resolvedMedia = { MediaUrl: resolvedUrl, NumMedia: "1" };
511+
}
512+
} catch (err) {
513+
ctx.log?.error(`[${ctx.accountId}] Failed to download media: ${err}`);
514+
(msg as any).__resolvedMedia = { MediaUrl: msg.mediaUrl, NumMedia: "1" };
515+
}
516+
}
517+
478518
// Build the MsgContext that OpenClaw's dispatch pipeline expects.
479519
// BotsChat users are authenticated (logged in via the web UI), so
480520
// mark commands as authorized — this lets directives like /model
@@ -501,19 +541,9 @@ async function handleCloudMessage(
501541
// Inject parent message as GroupSystemPrompt for thread context.
502542
...(parentContext ? { GroupSystemPrompt: parentContext } : {}),
503543
...(threadId ? { MessageThreadId: threadId, ReplyToId: threadId } : {}),
504-
// Include image URL if the user sent an image.
505-
// Resolve relative URLs (e.g. /api/media/...) to absolute using cloudUrl
506-
// so OpenClaw can fetch the image from the BotsChat cloud.
507-
...(msg.mediaUrl ? (() => {
508-
let resolvedUrl = msg.mediaUrl;
509-
if (resolvedUrl.startsWith("/")) {
510-
const baseUrl = cloudUrls.get(ctx.accountId);
511-
if (baseUrl) {
512-
resolvedUrl = baseUrl.replace(/\/$/, "") + resolvedUrl;
513-
}
514-
}
515-
return { MediaUrl: resolvedUrl, NumMedia: "1" };
516-
})() : {}),
544+
// Include image: download to local path so OpenClaw vision pipeline
545+
// can attach it to the model request (requires MediaPath).
546+
...((msg as any).__resolvedMedia || {}),
517547
};
518548

519549
// Finalize the context (normalizes fields, resolves agent route)

0 commit comments

Comments
 (0)