Skip to content

Commit c9e4bc9

Browse files
NagyViktclaude
andcommitted
Probe the live clipboard for images on every paste, in-daemon
The v1.33 browser Copy Image bridge lives in `paste_image.sh`, which the kitty Ctrl+V binding only invokes outside tmux. Inside tmux the binding short-circuits to `flashpaste-trigger`, sending the request straight to the daemon — and the daemon's existing pickup paths (`inotify_watch`, the screenshots-dir scan in `handle_paste`) only see files on disk. So right-click → Copy Image in Firefox/Chrome, then Ctrl+V in a tmux pane, attached nothing: the bytes were on the Wayland/X11 clipboard but never landed on `~/Pictures/Screenshots/`, and the daemon never probed there. `ipc.rs` now adds an eager live-clipboard image probe alongside the screenshot scan. `read_clipboard_image_if_present` walks Wayland MIME types (`wl_clipboard_rs::paste::get_mime_types`, 150 ms timeout) and xclip TARGETS (300 ms timeout), picks PNG > JPEG > WebP, reads bytes via wl-clipboard then xclip, and validates magic with `image_magic_ok` — the same defence against xclip's silent text-fallback that lives in `bin/flashpaste-capture-clip`. If the bytes differ from what's staged (byte-comparison against `state.staged_snapshot()`), the daemon stages them with a synthetic path under `screenshots_dir/flashpaste-clip-live.*` so downstream logging and the Aider adapter still get a clean filename. `state.rs` gains `last_live_image_probe_ms` so the probe shares the hot-path throttle window with the screenshots scan; back-to-back pastes don't fan out repeated xclip / wl-clipboard subprocess calls. Happy-path cost on an empty clipboard is ~15 ms (one MIME probe + TARGETS); the capture path is ~40–80 ms for a typical browser image. Verified end-to-end: `wl-copy -t image/png < img.png` + `flashpaste-trigger` on a tmux pane produces `paste: captured live clipboard image (browser Copy Image bridge) bytes=3370846 mime="image/png"` in the journal, followed by a successful `PASTED image`. Text path unchanged — when no image is on the clipboard the existing external-text override fires exactly as before. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0a68d95 commit c9e4bc9

2 files changed

Lines changed: 208 additions & 0 deletions

File tree

rs/flashpasted/src/ipc.rs

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,59 @@ async fn handle_paste(state: &Arc<SharedState>, pane: &str, started: Instant) ->
228228
}
229229
}
230230

231+
// ─── Eager live-clipboard image pickup ────────────────────────────
232+
// Bridges "browser Copy Image" — Firefox/Chrome under Wayland write
233+
// image bytes to the clipboard with no file under
234+
// ~/Pictures/Screenshots/, so the inotify watcher and the
235+
// newest-file scan above both miss them. Probe the live clipboard
236+
// here; if it advertises an image whose bytes differ from whatever
237+
// we have staged (or nothing's staged), stage them now. From this
238+
// point the existing X11/Wayland ownership path serves Claude
239+
// exactly the same way it does for inotify-staged screenshots.
240+
//
241+
// Cost on the happy path (no image on clipboard): one wl-clipboard
242+
// MIME probe (~5 ms) + xclip TARGETS probe (~10 ms) = ~15 ms.
243+
// Cost when capturing a 1 MB browser image: probe + read +
244+
// magic-check ~40–80 ms. Throttled to the same hot-path window as
245+
// the screenshots-dir scan so paste spam doesn't fan out probes.
246+
if should_probe_live_image(state) {
247+
if let Some((bytes, mime)) = read_clipboard_image_if_present().await {
248+
let differs = match state.staged_snapshot().await {
249+
Some(StagedSelection::Image(img)) => img.bytes.as_slice() != bytes.as_slice(),
250+
_ => true,
251+
};
252+
if differs {
253+
let len = bytes.len();
254+
// Synthetic path so downstream logging / Aider delivery
255+
// have a stable name. Use the screenshots dir when one
256+
// is configured so the file root is predictable.
257+
let path = state
258+
.config
259+
.screenshots_dir
260+
.clone()
261+
.unwrap_or_else(|| std::path::PathBuf::from("/tmp"))
262+
.join(match mime {
263+
"image/jpeg" => "flashpaste-clip-live.jpg",
264+
"image/webp" => "flashpaste-clip-live.webp",
265+
_ => "flashpaste-clip-live.png",
266+
});
267+
let img = StagedImage {
268+
bytes: Arc::new(bytes),
269+
mime,
270+
path,
271+
captured_at: std::time::SystemTime::now(),
272+
};
273+
info!(
274+
pane,
275+
bytes = len,
276+
mime,
277+
"paste: captured live clipboard image (browser Copy Image bridge)"
278+
);
279+
state.set_staged_image(img).await;
280+
}
281+
}
282+
}
283+
231284
// ─── Intent: text or image (most-recent staged wins) ──────────────
232285
// The staged-selection slot is single-valued (set_staged_image
233286
// replaces text and vice versa), so whatever's in the slot is the
@@ -440,6 +493,13 @@ fn should_probe_external_text(state: &SharedState) -> bool {
440493
)
441494
}
442495

496+
fn should_probe_live_image(state: &SharedState) -> bool {
497+
throttle_ms(
498+
&state.last_live_image_probe_ms,
499+
crate::tmux::HOT_PATH_PROBE_THROTTLE_MS,
500+
)
501+
}
502+
443503
fn throttle_ms(slot: &std::sync::atomic::AtomicU64, min_interval_ms: u64) -> bool {
444504
let now = now_unix_ms();
445505
let last = slot.load(Ordering::Relaxed);
@@ -566,6 +626,147 @@ fn is_text_target(target: &str) -> bool {
566626
matches!(target, "UTF8_STRING" | "STRING" | "TEXT") || target.starts_with("text/plain")
567627
}
568628

629+
/// Read raw image bytes off the live clipboard if (and only if) image MIME
630+
/// is advertised and the returned bytes pass a magic-byte check. Wayland
631+
/// first, X11 fallback. Returns `(bytes, mime)` on success.
632+
///
633+
/// Bridges the "browser Copy Image" case: Firefox/Chrome right-click →
634+
/// "Copy Image" puts bytes on the clipboard with no file write, so the
635+
/// daemon's inotify path on `~/Pictures/Screenshots/` never sees them.
636+
/// Before this probe, the only fix was the `flashpaste-capture-clip`
637+
/// shell shim called from `~/paste_image.sh` — but the kitty Ctrl+V
638+
/// binding short-circuits to `flashpaste-trigger` whenever the user is
639+
/// inside tmux, bypassing the shim entirely. Doing the probe here means
640+
/// the daemon serves the bridge regardless of which path triggered the
641+
/// paste.
642+
///
643+
/// MIME preference is PNG > JPEG > WebP. PNG is universally accepted by
644+
/// Claude Code; JPEG is what most sites serve photographic content as;
645+
/// WebP shows up on modern image-heavy sites. We pick the first one the
646+
/// clipboard actually advertises and trust the magic check downstream
647+
/// to catch xclip's silent text-fallback (xclip returns text bytes when
648+
/// the requested MIME isn't really there, which would otherwise stage a
649+
/// UTF-8 URL as if it were a PNG).
650+
async fn read_clipboard_image_if_present() -> Option<(Vec<u8>, &'static str)> {
651+
const PREFERRED: &[&str] = &["image/png", "image/jpeg", "image/webp"];
652+
653+
// ─── MIME probe: Wayland ────────────────────────────────────────
654+
let wl_types_task = tokio::task::spawn_blocking(|| {
655+
use wl_clipboard_rs::paste::{get_mime_types, ClipboardType, Seat};
656+
get_mime_types(ClipboardType::Regular, Seat::Unspecified)
657+
.ok()
658+
.map(|set| set.into_iter().collect::<Vec<String>>())
659+
.unwrap_or_default()
660+
});
661+
let wl_types = tokio::time::timeout(Duration::from_millis(150), wl_types_task)
662+
.await
663+
.ok()
664+
.and_then(|r| r.ok())
665+
.unwrap_or_default();
666+
667+
// ─── MIME probe: X11 ────────────────────────────────────────────
668+
let x_types: Vec<String> = match tokio::time::timeout(
669+
Duration::from_millis(300),
670+
tokio::process::Command::new("xclip")
671+
.args(["-selection", "clipboard", "-t", "TARGETS", "-o"])
672+
.output(),
673+
)
674+
.await
675+
{
676+
Ok(Ok(out)) if out.status.success() => String::from_utf8_lossy(&out.stdout)
677+
.lines()
678+
.map(|s| s.trim().to_string())
679+
.collect(),
680+
_ => Vec::new(),
681+
};
682+
683+
let has_image_wl = wl_types.iter().any(|t| t.starts_with("image/"));
684+
let has_image_x = x_types.iter().any(|t| t.starts_with("image/"));
685+
if !has_image_wl && !has_image_x {
686+
return None;
687+
}
688+
689+
// Pick the best MIME advertised on either side.
690+
let mime: &'static str = if let Some(m) = PREFERRED.iter().find(|want| {
691+
wl_types.iter().any(|t| t == *want) || x_types.iter().any(|t| t == *want)
692+
}) {
693+
match *m {
694+
"image/png" => "image/png",
695+
"image/jpeg" => "image/jpeg",
696+
"image/webp" => "image/webp",
697+
_ => return None,
698+
}
699+
} else {
700+
return None;
701+
};
702+
703+
// ─── Read bytes: Wayland first ──────────────────────────────────
704+
let bytes: Option<Vec<u8>> = if has_image_wl {
705+
let mime_owned = mime.to_string();
706+
let task = tokio::task::spawn_blocking(move || -> Option<Vec<u8>> {
707+
use wl_clipboard_rs::paste::{get_contents, ClipboardType, MimeType, Seat};
708+
let (mut pipe, _) = get_contents(
709+
ClipboardType::Regular,
710+
Seat::Unspecified,
711+
MimeType::Specific(&mime_owned),
712+
)
713+
.ok()?;
714+
let mut buf = Vec::new();
715+
pipe.read_to_end(&mut buf).ok()?;
716+
if buf.is_empty() {
717+
None
718+
} else {
719+
Some(buf)
720+
}
721+
});
722+
tokio::time::timeout(Duration::from_millis(500), task)
723+
.await
724+
.ok()
725+
.and_then(|r| r.ok())
726+
.flatten()
727+
} else {
728+
None
729+
};
730+
731+
// ─── X11 fallback when Wayland returned empty ───────────────────
732+
let bytes = match bytes {
733+
Some(b) => Some(b),
734+
None if has_image_x => match tokio::time::timeout(
735+
Duration::from_millis(500),
736+
tokio::process::Command::new("xclip")
737+
.args(["-selection", "clipboard", "-t", mime, "-o"])
738+
.output(),
739+
)
740+
.await
741+
{
742+
Ok(Ok(out)) if out.status.success() && !out.stdout.is_empty() => Some(out.stdout),
743+
_ => None,
744+
},
745+
_ => None,
746+
};
747+
748+
let bytes = bytes?;
749+
if !image_magic_ok(&bytes, mime) {
750+
return None;
751+
}
752+
Some((bytes, mime))
753+
}
754+
755+
/// Validate that `bytes` actually starts with the magic prefix of the
756+
/// declared MIME. Without this, xclip's silent text-fallback or a
757+
/// truncated read could stage a text blob as a PNG. Identical in spirit
758+
/// to the magic check in `bin/flashpaste-capture-clip`.
759+
fn image_magic_ok(bytes: &[u8], mime: &str) -> bool {
760+
match mime {
761+
"image/png" => bytes.starts_with(b"\x89PNG\r\n\x1a\n"),
762+
"image/jpeg" => bytes.starts_with(b"\xff\xd8\xff"),
763+
"image/webp" => {
764+
bytes.len() >= 12 && &bytes[..4] == b"RIFF" && &bytes[8..12] == b"WEBP"
765+
}
766+
_ => false,
767+
}
768+
}
769+
569770
async fn handle_stage(state: &Arc<SharedState>, image_path: &str) -> Value {
570771
let path = std::path::PathBuf::from(image_path);
571772
let mime = match path

rs/flashpasted/src/state.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,12 @@ pub struct SharedState {
196196
/// Last live X11 text probe. External clipboard checks are useful, but
197197
/// repeated paste presses should not shell out to xclip every time.
198198
pub last_external_text_probe_ms: AtomicU64,
199+
/// Last live-clipboard image probe. Used by `handle_paste` to bridge
200+
/// the "browser Copy Image" case (Firefox/Chrome write bytes to the
201+
/// clipboard with no file write, so the inotify path misses them).
202+
/// Throttled because each probe forks `xclip` and may also do a
203+
/// blocking wl-clipboard read.
204+
pub last_live_image_probe_ms: AtomicU64,
199205
}
200206

201207
impl SharedState {
@@ -214,6 +220,7 @@ impl SharedState {
214220
last_claim_request_image_ms: AtomicU64::new(0),
215221
last_screenshot_scan_ms: AtomicU64::new(0),
216222
last_external_text_probe_ms: AtomicU64::new(0),
223+
last_live_image_probe_ms: AtomicU64::new(0),
217224
}
218225
}
219226

0 commit comments

Comments
 (0)