Skip to content

Commit 836a6d8

Browse files
authored
feat(meet): join Google Meet calls with mascot virtual camera (tinyhumansai#1350)
1 parent 9a158cb commit 836a6d8

23 files changed

Lines changed: 2134 additions & 3 deletions

File tree

app/src-tauri/Cargo.lock

Lines changed: 265 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

app/src-tauri/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ anyhow = "1.0"
9898
parking_lot = "0.12"
9999
chrono = "0.4"
100100
async-trait = "0.1"
101+
# Mascot fake-camera pipeline (meet_call): rasterizes the OpenHuman
102+
# mascot SVG to a PNG once, converts it to a YUV420 Y4M frame, and
103+
# points CEF's `--use-file-for-fake-video-capture` flag at the cached
104+
# file so Meet sees the mascot as the agent's webcam. Pure Rust, no
105+
# system codecs needed.
106+
resvg = { version = "0.45", default-features = false, features = ["text", "system-fonts"] }
107+
tiny-skia = "0.11"
101108
# CEF + tauri-runtime-cef dependencies (always required).
102109
# `tauri-runtime-cef::notification::register` is how we hook native Web
103110
# Notification interception per webview, and `cef::Browser` is what we downcast

app/src-tauri/permissions/allow-core-process.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,11 @@ allow = [
7474
# backing commands.
7575
"logs_folder_path",
7676
"reveal_logs_folder",
77+
# Meet call: open / close a dedicated CEF webview window pointed at a
78+
# https://meet.google.com/<code> URL with an isolated per-call data
79+
# directory. Surfaced from Intelligence > Calls. Without these allow
80+
# entries the invoke is rejected with "Command not found".
81+
"meet_call_open_window",
82+
"meet_call_close_window",
7783
]
7884
deny = []
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
//! Mascot-as-webcam pipeline.
2+
//!
3+
//! Once at app startup we rasterize the OpenHuman mascot SVG into a
4+
//! 640×480 RGBA bitmap, convert it to YUV420, and write a single-frame
5+
//! YUV4MPEG2 (Y4M) file to the per-user data directory. The file is
6+
//! cached across launches keyed by source-SVG hash so subsequent boots
7+
//! skip the rasterization.
8+
//!
9+
//! At browser launch, `lib.rs` passes the cached path to CEF via
10+
//! `--use-file-for-fake-video-capture=<path>`. CEF reads it on every
11+
//! `getUserMedia({video:true})` call and loops on EOF, so a single
12+
//! frame produces a steady-state still image as the agent's "webcam".
13+
//!
14+
//! No JS is injected anywhere — this is a process-level Chromium flag,
15+
//! not page-level instrumentation.
16+
17+
use std::fs;
18+
use std::path::{Path, PathBuf};
19+
20+
use resvg::usvg::{Options as UsvgOptions, Tree as UsvgTree};
21+
use tiny_skia::{Pixmap, Transform};
22+
23+
/// Output webcam resolution. 640×480 is what every videoconferencing
24+
/// app expects to negotiate against; Meet downscales to whatever it
25+
/// wants from there.
26+
const WIDTH: u32 = 640;
27+
const HEIGHT: u32 = 480;
28+
const FRAMERATE: &str = "F30:1";
29+
30+
/// Mascot SVG embedded at build time. The remotion bundle owns the
31+
/// canonical asset; we vendor a copy of its content via `include_str!`
32+
/// so the shell builds without needing the remotion tree at runtime.
33+
const MASCOT_SVG: &str = include_str!("../../../../remotion/public/mascot.svg");
34+
35+
/// Top-level entrypoint. Returns the path to a Y4M file CEF can read,
36+
/// rasterizing the mascot if no cached version exists.
37+
///
38+
/// Errors are logged + returned as `String` so the caller (lib.rs)
39+
/// can decide whether to skip the fake-camera flag and let the user
40+
/// see the default "no camera" path. We do **not** panic — a missing
41+
/// fake camera is degraded but not fatal.
42+
pub fn ensure_mascot_y4m(data_dir: &Path) -> Result<PathBuf, String> {
43+
let cache_dir = data_dir.join("cache").join("fake_camera");
44+
fs::create_dir_all(&cache_dir).map_err(|e| format!("create cache dir: {e}"))?;
45+
46+
let svg_hash = stable_hash(MASCOT_SVG);
47+
let y4m_path = cache_dir.join(format!("mascot-{WIDTH}x{HEIGHT}-{svg_hash:016x}.y4m"));
48+
49+
if y4m_path.exists() {
50+
log::info!(
51+
"[fake-camera] reusing cached mascot Y4M path={}",
52+
y4m_path.display()
53+
);
54+
return Ok(y4m_path);
55+
}
56+
57+
log::info!(
58+
"[fake-camera] rasterizing mascot {}x{} -> {}",
59+
WIDTH,
60+
HEIGHT,
61+
y4m_path.display()
62+
);
63+
let rgba = rasterize_svg(MASCOT_SVG)?;
64+
let y4m_bytes = encode_single_frame_y4m(&rgba);
65+
66+
// Atomic-ish write: write to .partial then rename, so a crash
67+
// mid-write never leaves CEF reading a half-finished Y4M.
68+
let tmp_path = y4m_path.with_extension("y4m.partial");
69+
fs::write(&tmp_path, &y4m_bytes).map_err(|e| format!("write y4m: {e}"))?;
70+
// Tolerate a concurrent writer landing first: if rename fails but the
71+
// target already exists, the other writer wrote the same SVG-hash-keyed
72+
// file and we can drop our temp copy.
73+
match fs::rename(&tmp_path, &y4m_path) {
74+
Ok(()) => Ok(y4m_path),
75+
Err(_) if y4m_path.exists() => {
76+
let _ = fs::remove_file(&tmp_path);
77+
Ok(y4m_path)
78+
}
79+
Err(e) => Err(format!("rename y4m: {e}")),
80+
}
81+
}
82+
83+
/// Render the SVG to a 640×480 RGBA8 bitmap, letterboxed onto a flat
84+
/// background so the mascot looks centered in the participant tile
85+
/// regardless of source aspect ratio.
86+
fn rasterize_svg(svg: &str) -> Result<Vec<u8>, String> {
87+
let tree =
88+
UsvgTree::from_str(svg, &UsvgOptions::default()).map_err(|e| format!("parse svg: {e}"))?;
89+
let svg_size = tree.size();
90+
let svg_w = svg_size.width();
91+
let svg_h = svg_size.height();
92+
if svg_w <= 0.0 || svg_h <= 0.0 {
93+
return Err("mascot svg has zero size".into());
94+
}
95+
96+
let mut pixmap = Pixmap::new(WIDTH, HEIGHT).ok_or_else(|| "alloc pixmap".to_string())?;
97+
// Background fill — Meet's tile is rectangular and we want a clean
98+
// backdrop, not transparent (which the YUV conversion would
99+
// collapse to black anyway).
100+
pixmap.fill(tiny_skia::Color::from_rgba8(247, 244, 238, 255));
101+
102+
// Fit the mascot inside the frame with a 12% margin so it doesn't
103+
// get cropped at the corners by Meet's rounded mask.
104+
let margin = 0.12;
105+
let target_w = (WIDTH as f32) * (1.0 - 2.0 * margin);
106+
let target_h = (HEIGHT as f32) * (1.0 - 2.0 * margin);
107+
let scale = (target_w / svg_w).min(target_h / svg_h);
108+
let drawn_w = svg_w * scale;
109+
let drawn_h = svg_h * scale;
110+
let tx = ((WIDTH as f32) - drawn_w) / 2.0;
111+
let ty = ((HEIGHT as f32) - drawn_h) / 2.0;
112+
113+
let transform = Transform::from_scale(scale, scale).post_translate(tx, ty);
114+
resvg::render(&tree, transform, &mut pixmap.as_mut());
115+
116+
Ok(pixmap.take())
117+
}
118+
119+
/// Convert an RGBA8 buffer (length WIDTH * HEIGHT * 4) to a Y4M file
120+
/// containing a single FRAME using BT.601 limited-range coefficients.
121+
/// Chromium's fake video capture re-reads the file in a loop, so one
122+
/// frame is enough for a steady image.
123+
fn encode_single_frame_y4m(rgba: &[u8]) -> Vec<u8> {
124+
let header = format!(
125+
"YUV4MPEG2 W{WIDTH} H{HEIGHT} {FRAMERATE} Ip A1:1 C420jpeg Xopenhuman-mascot\nFRAME\n"
126+
);
127+
128+
let pixel_count = (WIDTH * HEIGHT) as usize;
129+
let mut y_plane = Vec::with_capacity(pixel_count);
130+
let chroma_count = ((WIDTH / 2) * (HEIGHT / 2)) as usize;
131+
let mut u_plane = Vec::with_capacity(chroma_count);
132+
let mut v_plane = Vec::with_capacity(chroma_count);
133+
134+
// Y plane: per-pixel luma.
135+
for chunk in rgba.chunks_exact(4) {
136+
let (r, g, b) = (chunk[0] as f32, chunk[1] as f32, chunk[2] as f32);
137+
let y = (0.299 * r + 0.587 * g + 0.114 * b).clamp(0.0, 255.0) as u8;
138+
y_plane.push(y);
139+
}
140+
141+
// U/V planes: average each 2×2 block.
142+
for by in (0..HEIGHT).step_by(2) {
143+
for bx in (0..WIDTH).step_by(2) {
144+
let mut r_sum = 0.0;
145+
let mut g_sum = 0.0;
146+
let mut b_sum = 0.0;
147+
for dy in 0..2 {
148+
for dx in 0..2 {
149+
let x = bx + dx;
150+
let y = by + dy;
151+
let idx = ((y * WIDTH + x) * 4) as usize;
152+
r_sum += rgba[idx] as f32;
153+
g_sum += rgba[idx + 1] as f32;
154+
b_sum += rgba[idx + 2] as f32;
155+
}
156+
}
157+
let r = r_sum / 4.0;
158+
let g = g_sum / 4.0;
159+
let b = b_sum / 4.0;
160+
let u = (-0.169 * r - 0.331 * g + 0.5 * b + 128.0).clamp(0.0, 255.0) as u8;
161+
let v = (0.5 * r - 0.419 * g - 0.081 * b + 128.0).clamp(0.0, 255.0) as u8;
162+
u_plane.push(u);
163+
v_plane.push(v);
164+
}
165+
}
166+
167+
let mut out = Vec::with_capacity(header.len() + y_plane.len() + u_plane.len() + v_plane.len());
168+
out.extend_from_slice(header.as_bytes());
169+
out.extend_from_slice(&y_plane);
170+
out.extend_from_slice(&u_plane);
171+
out.extend_from_slice(&v_plane);
172+
out
173+
}
174+
175+
/// Stable, deterministic hash of a string — used to key the Y4M cache
176+
/// against the source SVG. We don't need cryptographic strength, just
177+
/// "did the SVG change?", so std's `DefaultHasher` is fine.
178+
fn stable_hash(s: &str) -> u64 {
179+
use std::hash::{Hash, Hasher};
180+
let mut h = std::collections::hash_map::DefaultHasher::new();
181+
s.hash(&mut h);
182+
h.finish()
183+
}
184+
185+
#[cfg(test)]
186+
mod tests {
187+
use super::*;
188+
189+
#[test]
190+
fn y4m_header_includes_dimensions_and_colorspace() {
191+
let dummy = vec![0u8; (WIDTH * HEIGHT * 4) as usize];
192+
let bytes = encode_single_frame_y4m(&dummy);
193+
let header_end = bytes.iter().position(|&b| b == b'\n').unwrap();
194+
let header = std::str::from_utf8(&bytes[..header_end]).unwrap();
195+
assert!(header.contains(&format!("W{WIDTH}")));
196+
assert!(header.contains(&format!("H{HEIGHT}")));
197+
assert!(header.contains("C420jpeg"));
198+
}
199+
200+
#[test]
201+
fn y4m_payload_size_matches_yuv420_layout() {
202+
let dummy = vec![0u8; (WIDTH * HEIGHT * 4) as usize];
203+
let bytes = encode_single_frame_y4m(&dummy);
204+
// Header up to first newline, then "FRAME\n", then planes.
205+
let frame_marker = b"FRAME\n";
206+
let frame_idx = bytes
207+
.windows(frame_marker.len())
208+
.position(|w| w == frame_marker)
209+
.expect("FRAME marker present");
210+
let payload_len = bytes.len() - frame_idx - frame_marker.len();
211+
let expected = (WIDTH * HEIGHT) as usize + 2 * ((WIDTH / 2) * (HEIGHT / 2)) as usize;
212+
assert_eq!(payload_len, expected);
213+
}
214+
215+
#[test]
216+
fn rasterize_svg_produces_correctly_sized_buffer() {
217+
let rgba = rasterize_svg(MASCOT_SVG).expect("rasterize");
218+
assert_eq!(rgba.len(), (WIDTH * HEIGHT * 4) as usize);
219+
}
220+
221+
#[test]
222+
fn stable_hash_is_deterministic() {
223+
assert_eq!(stable_hash("openhuman"), stable_hash("openhuman"));
224+
assert_ne!(stable_hash("a"), stable_hash("b"));
225+
}
226+
}

app/src-tauri/src/file_logging.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ pub fn init() {
3535
/// `dirs::home_dir` to return `None`), falls back to `<temp>/openhuman`
3636
/// rather than a relative `.openhuman` whose final location depends on the
3737
/// shell's CWD at launch time.
38-
fn resolve_data_dir() -> PathBuf {
38+
pub(crate) fn resolve_data_dir() -> PathBuf {
3939
if let Ok(workspace) = std::env::var("OPENHUMAN_WORKSPACE") {
4040
if !workspace.is_empty() {
4141
return PathBuf::from(workspace);

app/src-tauri/src/lib.rs

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,14 @@ mod core_process;
99
mod core_rpc;
1010
mod dictation_hotkeys;
1111
mod discord_scanner;
12+
mod fake_camera;
1213
mod file_logging;
1314
mod gmessages_scanner;
1415
mod imessage_scanner;
1516
#[cfg(target_os = "macos")]
1617
mod mascot_native_window;
18+
mod meet_call;
19+
mod meet_scanner;
1720
mod native_notifications;
1821
mod notification_settings;
1922
mod process_kill;
@@ -1255,6 +1258,37 @@ pub fn run() {
12551258
// silently disappears and huddle/call buttons no-op.
12561259
("--enable-features", Some("SharedArrayBuffer")),
12571260
];
1261+
// Mascot fake-camera: bake the SVG into a one-frame Y4M and
1262+
// point Chromium's fake-video-capture pipeline at it so any
1263+
// CEF webview that calls `getUserMedia({video:true})` sees the
1264+
// mascot as the agent's webcam. `--use-fake-ui-for-media-stream`
1265+
// auto-allows the permission prompt so Meet's join page doesn't
1266+
// get stuck behind it. The flags are process-level (affect every
1267+
// CEF webview), which is fine today: only the Meet call window
1268+
// intentionally requests a camera, and other webviews don't ask
1269+
// for one. The path string is leaked with `Box::leak` so its
1270+
// `&str` outlives the args vec we hand to `command_line_args`.
1271+
let fake_camera_arg: Option<&'static str> =
1272+
match fake_camera::ensure_mascot_y4m(&file_logging::resolve_data_dir()) {
1273+
Ok(path) => {
1274+
let leaked: &'static str =
1275+
Box::leak(path.to_string_lossy().into_owned().into_boxed_str());
1276+
log::info!("[cef-startup] fake-camera y4m path={leaked}");
1277+
Some(leaked)
1278+
}
1279+
Err(err) => {
1280+
log::warn!(
1281+
"[cef-startup] mascot fake-camera unavailable: {err} \
1282+
(Meet will see no camera)"
1283+
);
1284+
None
1285+
}
1286+
};
1287+
if let Some(path) = fake_camera_arg {
1288+
args.push(("--use-fake-device-for-media-stream", None));
1289+
args.push(("--use-fake-ui-for-media-stream", None));
1290+
args.push(("--use-file-for-fake-video-capture", Some(path)));
1291+
}
12581292
// Always expose the CDP port, not just in debug. The webview-accounts
12591293
// CDP session opener navigates each embedded provider webview from its
12601294
// `about:blank#openhuman-acct-...` placeholder to the real provider URL
@@ -1319,6 +1353,7 @@ pub fn run() {
13191353
let builder = builder.manage(discord_scanner::ScannerRegistry::new());
13201354
let builder = builder.manage(telegram_scanner::ScannerRegistry::new());
13211355
let builder = builder.manage(screen_capture::ScreenShareState::new());
1356+
let builder = builder.manage(meet_call::MeetCallState::new());
13221357
builder
13231358
.setup(move |app| {
13241359
#[cfg(any(windows, target_os = "linux"))]
@@ -1809,7 +1844,9 @@ pub fn run() {
18091844
mascot_window_show,
18101845
mascot_window_hide,
18111846
file_logging::reveal_logs_folder,
1812-
file_logging::logs_folder_path
1847+
file_logging::logs_folder_path,
1848+
meet_call::meet_call_open_window,
1849+
meet_call::meet_call_close_window
18131850
])
18141851
.build(tauri::generate_context!())
18151852
.expect("error while building tauri application")

0 commit comments

Comments
 (0)