diff --git a/ui/e2e/remote-agent/ra-all.spec.ts b/ui/e2e/remote-agent/ra-all.spec.ts index 8f4cf68dc..761cb31fd 100644 --- a/ui/e2e/remote-agent/ra-all.spec.ts +++ b/ui/e2e/remote-agent/ra-all.spec.ts @@ -391,6 +391,53 @@ test.describe("Remote Host Agent", () => { expect(cPresses.length).toBeGreaterThanOrEqual(1); }); + // ═══════════════════════════════════════════ + // KEYBOARD: MODIFIER RECONCILIATION (Firefox RFP) + // ═══════════════════════════════════════════ + + test("keyboard: modifier reconciliation when standalone modifier events are suppressed (#1305)", async () => { + // Firefox resistFingerprinting suppresses standalone Shift keydown/keyup events. + // When Shift+A is pressed, only the "KeyA" event fires with shiftKey=true. + // The reconciliation logic should synthesize the missing Shift press/release. + await agent!.clearKeyboardEvents(); + + // Simulate Firefox RFP: dispatch keydown for "KeyA" with shiftKey=true (no prior Shift keydown) + // then keyup for "KeyA" with shiftKey=false (Shift was released but no Shift keyup fires) + await sharedPage.evaluate(() => { + const downEvent = new KeyboardEvent("keydown", { + code: "KeyA", + key: "A", + shiftKey: true, + bubbles: true, + cancelable: true, + }); + document.dispatchEvent(downEvent); + }); + await new Promise(r => setTimeout(r, 50)); + + await sharedPage.evaluate(() => { + const upEvent = new KeyboardEvent("keyup", { + code: "KeyA", + key: "a", + shiftKey: false, + bubbles: true, + cancelable: true, + }); + document.dispatchEvent(upEvent); + }); + await new Promise(r => setTimeout(r, 100)); + + // Verify the remote host received both LEFT_SHIFT and A key presses + const events = await agent!.getKeyboardEvents(); + const pressedCodes = events.filter(ev => ev.type === "key_press").map(ev => ev.code); + const releasedCodes = events.filter(ev => ev.type === "key_release").map(ev => ev.code); + + expect(pressedCodes, "Shift press should be synthesized").toContain(KEY.LEFT_SHIFT); + expect(pressedCodes, "A press should be sent").toContain(KEY.A); + expect(releasedCodes, "Shift release should be synthesized").toContain(KEY.LEFT_SHIFT); + expect(releasedCodes, "A release should be sent").toContain(KEY.A); + }); + // ═══════════════════════════════════════════ // MOUSE // ═══════════════════════════════════════════ diff --git a/ui/src/components/WebRTCVideo.tsx b/ui/src/components/WebRTCVideo.tsx index 4b8884e0e..390f9fef7 100644 --- a/ui/src/components/WebRTCVideo.tsx +++ b/ui/src/components/WebRTCVideo.tsx @@ -92,6 +92,11 @@ export default function WebRTCVideo({ const lastKeyDownRef = useRef<{ hidKey: number; time: number } | null>(null); const altGrLoopRef = useRef(false); + // Firefox resistFingerprinting suppresses standalone modifier key events. + // Reconcile modifier state from event boolean properties as a fallback. + // Track which modifiers we synthetically pressed so we don't double-send. + const syntheticModifiersRef = useRef>(new Set()); + useResizeObserver({ ref: videoElm as React.RefObject, onResize: handleResize, @@ -289,6 +294,38 @@ export default function WebRTCVideo({ return code; } + // Firefox resistFingerprinting suppresses standalone modifier key events. + // Reconcile modifier state from event boolean properties as a fallback. + const reconcileModifiers = useCallback( + (e: KeyboardEvent, isKeyDown: boolean) => { + const mapping: [boolean, number][] = [ + [e.shiftKey, keys.ShiftLeft], + [e.ctrlKey, keys.ControlLeft], + [e.altKey, keys.AltLeft], + [e.metaKey, keys.MetaLeft], + ]; + const synthetic = syntheticModifiersRef.current; + for (const [active, hidKey] of mapping) { + if (isKeyDown && active && !synthetic.has(hidKey)) { + // The modifier is active but we never saw a keydown for it. + // Only synthesize if the current event is NOT for this modifier key itself + // (if it is, the normal path already handles it). + const code = getAdjustedKeyCode(e); + const eventHidKey = keys[code]; + if (eventHidKey !== hidKey) { + synthetic.add(hidKey); + handleKeyPress(hidKey, true); + } + } else if (!active && synthetic.has(hidKey)) { + // The modifier was released but we never saw a keyup for it. + synthetic.delete(hidKey); + handleKeyPress(hidKey, false); + } + } + }, + [handleKeyPress], + ); + const keyDownHandler = useCallback( (e: KeyboardEvent) => { if (isOcrMode) return; // Let OCR overlay handle keys @@ -301,6 +338,10 @@ export default function WebRTCVideo({ return; } + // If a real modifier keydown fires, clear its synthetic tracking + // so we don't double-release later. + syntheticModifiersRef.current.delete(hidKey); + // Detect Windows synthetic AltGr (CtrlLeft then AltRight within ~3ms) and cancel the synthetic Ctrl if (isWindowsClient) { // Buffer ControlLeft briefly; if no AltRight follows within the threshold, treat it as a real ControlLeft press. @@ -350,6 +391,8 @@ export default function WebRTCVideo({ console.debug(`Key down: ${hidKey}`); handleKeyPress(hidKey, true); + reconcileModifiers(e, true); + if (!isKeyboardLockActive && hidKey === keys.MetaLeft) { // If the left meta key was just pressed and we're not keyboard locked // we'll never see the keyup event because the browser is going to lose @@ -360,7 +403,7 @@ export default function WebRTCVideo({ }, 100); } }, - [handleKeyPress, isKeyboardLockActive, isOcrMode, isWindowsClient], + [handleKeyPress, isKeyboardLockActive, isOcrMode, isWindowsClient, reconcileModifiers], ); const keyUpHandler = useCallback( @@ -402,8 +445,10 @@ export default function WebRTCVideo({ console.debug(`Key up: ${hidKey}`); handleKeyPress(hidKey, false); + + reconcileModifiers(e, false); }, - [handleKeyPress, isOcrMode, isWindowsClient], + [handleKeyPress, isOcrMode, isWindowsClient, reconcileModifiers], ); const videoKeyUpHandler = useCallback((e: KeyboardEvent) => { diff --git a/ui/src/utils.ts b/ui/src/utils.ts index 79d879089..a81efc69e 100644 --- a/ui/src/utils.ts +++ b/ui/src/utils.ts @@ -226,7 +226,34 @@ export function isMac() { } export function isWindows() { - return !!/win/i.exec(navigator.platform); + // Prefer NavigatorUAData.platform (User-Agent Client Hints API) when available. + // Supported in Chromium-based browsers and not spoofed by Firefox RFP. + const uaData = (navigator as Navigator & { userAgentData?: { platform: string } }) + .userAgentData; + if (uaData?.platform) { + return !!/win/i.exec(uaData.platform); + } + + // For browsers without userAgentData (Firefox, Safari): + // Firefox's resistFingerprinting (RFP) spoofs navigator.platform to "Win32" + // on all OSes, which incorrectly activates Windows-specific AltGr key + // buffering. RFP also clamps event timestamps to ~100ms precision, making + // the 3ms AltGr timing check unreliable. + // + // Detect RFP by checking if timestamps are clamped to 100ms boundaries. + // Date.now() returns epoch ms — its mod 100 is essentially uniformly + // distributed. RFP clamps it to 100ms, so mod 100 is always 0. + // performance.timeOrigin is also epoch-based and independently clamped. + // Combined false-positive rate: ~0.01% (1/100 × 1/100). + if (!!/win/i.exec(navigator.platform)) { + const rfpLikely = Date.now() % 100 === 0 && performance.timeOrigin % 100 === 0; + if (rfpLikely) { + return false; + } + return true; + } + + return false; } export function isIOS() {