Skip to content

Commit 67bca65

Browse files
author
王璨
committed
fix: support mixed text+image input in chat box
- handlePasteImage now returns InputListenerResult to support data transformation - Extract printable text from mixed content pastes, pass it through to editor - Add Kitty image protocol (ESC _ G) detection for native image pasting - Update hint text to indicate text can be typed alongside images - Non-printable binary data consumed, never reaches the editor
1 parent 2d93f83 commit 67bca65

1 file changed

Lines changed: 122 additions & 25 deletions

File tree

src/ui/tui-app.ts

Lines changed: 122 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ export interface TuiDeps {
5252
onSetThinking: (level: string) => void;
5353
}
5454

55+
type InputListenerResult = { consume?: boolean; data?: string } | undefined;
56+
5557
export class TuiApp {
5658
private deps: TuiDeps;
5759
private terminal: ProcessTerminal;
@@ -71,7 +73,8 @@ export class TuiApp {
7173
private processing = false;
7274
private lastCtrlCPress = 0;
7375
private ctrlCDebounceUntil = 0;
74-
private menuNavDebounceUntil = 0;
76+
private lastMenuNavDirection: "up" | "down" | null = null;
77+
private lastMenuNavAt = 0;
7578
private resolvePermission: ((result: PermissionPromptResult) => void) | null = null;
7679
private pendingPermissionContext:
7780
| { toolName: string; args: unknown }
@@ -116,8 +119,9 @@ export class TuiApp {
116119
};
117120

118121
this.tui.addInputListener((data) => {
119-
if (this.handlePasteImage(data)) {
120-
return { consume: true };
122+
const pasteResult = this.handlePasteImage(data);
123+
if (pasteResult) {
124+
return pasteResult;
121125
}
122126
if (this.handleInput(data)) {
123127
return { consume: true };
@@ -203,11 +207,12 @@ export class TuiApp {
203207
};
204208
}
205209

206-
private canNavigateMenu(now = Date.now()): boolean {
207-
if (now < this.menuNavDebounceUntil) {
210+
private canNavigateMenu(direction: "up" | "down", now = Date.now()): boolean {
211+
if (this.lastMenuNavDirection === direction && now - this.lastMenuNavAt < 90) {
208212
return false;
209213
}
210-
this.menuNavDebounceUntil = now + 90;
214+
this.lastMenuNavDirection = direction;
215+
this.lastMenuNavAt = now;
211216
return true;
212217
}
213218

@@ -245,13 +250,13 @@ export class TuiApp {
245250

246251
if (this.resolvePermission) {
247252
if (matchesKey(data, Key.up)) {
248-
if (this.canNavigateMenu()) {
253+
if (this.canNavigateMenu("up")) {
249254
this.conversation.permNavigate(-1);
250255
}
251256
return true;
252257
}
253258
if (matchesKey(data, Key.down)) {
254-
if (this.canNavigateMenu()) {
259+
if (this.canNavigateMenu("down")) {
255260
this.conversation.permNavigate(1);
256261
}
257262
return true;
@@ -391,7 +396,7 @@ export class TuiApp {
391396
}
392397

393398
if (matchesKey(data, Key.up)) {
394-
if (!this.canNavigateMenu()) {
399+
if (!this.canNavigateMenu("up")) {
395400
return true;
396401
}
397402
if (this.mcpSelectedServerIndex == null) {
@@ -412,7 +417,7 @@ export class TuiApp {
412417
}
413418

414419
if (matchesKey(data, Key.down)) {
415-
if (!this.canNavigateMenu()) {
420+
if (!this.canNavigateMenu("down")) {
416421
return true;
417422
}
418423
if (this.mcpSelectedServerIndex == null) {
@@ -458,17 +463,81 @@ export class TuiApp {
458463
return true;
459464
}
460465

461-
private handlePasteImage(data: string): boolean {
462-
// Detect empty bracketed paste (possible image paste)
466+
/**
467+
* Handle pasted content that may contain images.
468+
*
469+
* Supports:
470+
* - Bracketed paste with image data (terminal wraps clipboard image in \x1b[200~...\x1b[201~)
471+
* - Kitty image protocol sequences (\x1b_G...\x1b\\) sent directly or within bracketed paste
472+
* - Mixed content: text accompanying images is extracted and passed through to the editor
473+
*
474+
* Returns an InputListenerResult:
475+
* - undefined: not a paste/kitty sequence, let other handlers process
476+
* - { consume: true }: pure image/kittty data, consume entirely
477+
* - { data: string }: mixed content, pass extracted text to editor while also loading image
478+
*/
479+
private handlePasteImage(data: string): InputListenerResult {
480+
// ── Kitty image protocol (ESC _ G ... ESC \) ──
481+
// These APC sequences can arrive outside bracketed paste when the terminal
482+
// natively pastes images via Kitty protocol.
483+
if (this.handleKittyImageProtocol(data)) {
484+
return { consume: true };
485+
}
486+
487+
// ── Bracketed paste ──
463488
const m = data.match(/^\x1b\[200~([\s\S]*?)\x1b\[201~$/);
464-
if (!m) return false;
489+
if (!m) return undefined;
465490

466491
const pasteContent = m[1];
467-
// Only intercept if paste content is empty or contains non-printable data
468-
if (pasteContent.trim() !== "" && !/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]/.test(pasteContent)) {
469-
return false;
492+
493+
// Check if paste is pure printable text (no image data)
494+
const hasControlChars = /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]/.test(pasteContent);
495+
if (pasteContent.trim() !== "" && !hasControlChars) {
496+
// Pure text paste — let it through to the editor unchanged
497+
return undefined;
498+
}
499+
500+
// ── Paste contains potential image data ──
501+
// Extract any printable text mixed with the image binary data
502+
const printableText = this.extractPrintableText(pasteContent);
503+
504+
// Try to read the image from system clipboard (macOS only)
505+
readClipboardImageNonBlocking().then((img) => {
506+
if (img) {
507+
this.pendingImages.push(img);
508+
this.updateImageStatus();
509+
this.conversation.addInlineImage(img.data, img.mimeType);
510+
if (!this.deps.modelSupportsImages) {
511+
this.conversation.addInfo(
512+
c.yellow(`${this.deps.modelName} does not support image input.`),
513+
);
514+
}
515+
}
516+
});
517+
518+
// If there's extractable printable text, pass it through to the editor
519+
// so text + image can coexist in the same message
520+
if (printableText.length > 0) {
521+
// Transform: strip the image data, keep the text for the editor
522+
return { data: printableText };
470523
}
471524

525+
// Pure image paste — consume entirely (don't let binary data reach the editor)
526+
return { consume: true };
527+
}
528+
529+
/**
530+
* Handle Kitty image protocol sequences.
531+
* Kitty uses APC sequences: ESC _ G <params> ; <base64> ESC \
532+
* These can arrive as direct input when pasting images in Kitty-native terminals.
533+
*/
534+
private handleKittyImageProtocol(data: string): boolean {
535+
// Kitty image transmission always contains ESC _ G
536+
if (!data.includes("\x1b_G")) return false;
537+
538+
// The data might be a Kitty image transmission.
539+
// We consume it and try to read the clipboard image instead,
540+
// since extracting base64 from Kitty protocol chunks is fragile.
472541
readClipboardImageNonBlocking().then((img) => {
473542
if (img) {
474543
this.pendingImages.push(img);
@@ -485,6 +554,24 @@ export class TuiApp {
485554
return true;
486555
}
487556

557+
/**
558+
* Extract printable text characters from mixed binary/text content.
559+
* Filters out control characters while preserving spaces, newlines, tabs,
560+
* and all printable Unicode characters.
561+
*/
562+
private extractPrintableText(content: string): string {
563+
return content
564+
.split("")
565+
.filter((char) => {
566+
const code = char.charCodeAt(0);
567+
// Keep: printable ASCII (>=32), newline, carriage return, tab
568+
// Also keep: all Unicode chars above ASCII range (code >= 128)
569+
return code >= 32 || char === "\n" || char === "\r" || char === "\t";
570+
})
571+
.join("")
572+
.trim();
573+
}
574+
488575
private handleCtrlC(): void {
489576
if (this.resolvePermission) {
490577
this.resolvePermissionChoice({ decision: "deny" });
@@ -649,7 +736,10 @@ export class TuiApp {
649736
}
650737

651738
private handleSubmit(text: string): void {
652-
if (!text) return;
739+
const images = this.pendingImages.length > 0 ? [...this.pendingImages] : undefined;
740+
const hasText = text.length > 0;
741+
const hasImages = Boolean(images?.length);
742+
if (!hasText && !hasImages) return;
653743

654744
this.editor.setText("");
655745

@@ -692,13 +782,15 @@ export class TuiApp {
692782
);
693783
}
694784

695-
const imageIndicator = this.pendingImages.length > 0
696-
? "\n" + c.dim(`[${this.pendingImages.length} image(s) attached]`)
785+
const imageIndicator = images
786+
? c.dim(`[${images.length} image(s) attached]`)
697787
: "";
698-
this.addUserMessage(text + imageIndicator);
788+
const userMessage = hasText
789+
? imageIndicator ? `${text}\n${imageIndicator}` : text
790+
: imageIndicator;
791+
this.addUserMessage(userMessage);
699792
this.setProcessing(true);
700793

701-
const images = this.pendingImages.length > 0 ? [...this.pendingImages] : undefined;
702794
this.pendingImages = [];
703795
this.updateImageStatus();
704796

@@ -707,9 +799,13 @@ export class TuiApp {
707799
(result: OcrResult) => {
708800
let promptText: string;
709801
if (result.hasText) {
710-
promptText = `${text}\n\n<image_text>\n${result.content}\n</image_text>`;
802+
promptText = hasText
803+
? `${text}\n\n<image_text>\n${result.content}\n</image_text>`
804+
: `<image_text>\n${result.content}\n</image_text>`;
711805
} else {
712-
promptText = `${text}\n\n(用户附带了一张图片,但图片中没有可识别的文字内容)`;
806+
promptText = hasText
807+
? `${text}\n\n(用户附带了一张图片,但图片中没有可识别的文字内容)`
808+
: "(用户附带了一张图片,但图片中没有可识别的文字内容)";
713809
}
714810
this.deps.agent.prompt(promptText).then(
715811
() => this.setProcessing(false),
@@ -772,7 +868,8 @@ export class TuiApp {
772868
this.conversation.clear();
773869
this.permissionExplainMode = false;
774870
this.pendingPermissionContext = null;
775-
this.menuNavDebounceUntil = 0;
871+
this.lastMenuNavDirection = null;
872+
this.lastMenuNavAt = 0;
776873
this.pendingImages = [];
777874
if (this.mcpPanelVisible) {
778875
this.closeMcpBrowser();
@@ -796,7 +893,7 @@ export class TuiApp {
796893
this.pendingImages.reduce((sum, img) => sum + img.data.length * 0.75, 0) / 1024,
797894
);
798895
this.imageStatus.setText(
799-
c.dim(` ${this.pendingImages.length} image(s) attached (${totalKB} KB)`),
896+
c.dim(` ${this.pendingImages.length} image(s) attached (${totalKB} KB) — type text and press Enter to send`),
800897
);
801898
} else {
802899
this.imageStatus.setText("");

0 commit comments

Comments
 (0)