Skip to content

Commit 364844e

Browse files
authored
feat: add recording quality flag (#409)
* feat: add recording quality flag * test: cover quality edge cases * fix: scale transform translation in resize * fix: narrow recording quality flag
1 parent 8f7375f commit 364844e

31 files changed

+799
-26
lines changed

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,16 +183,25 @@ extension RunnerTests {
183183
if let requestedFps = command.fps, (requestedFps < minRecordingFps || requestedFps > maxRecordingFps) {
184184
return Response(ok: false, error: ErrorPayload(message: "recordStart fps must be between \(minRecordingFps) and \(maxRecordingFps)"))
185185
}
186+
if let requestedQuality = command.quality, (requestedQuality < minRecordingQuality || requestedQuality > maxRecordingQuality) {
187+
return Response(ok: false, error: ErrorPayload(message: "recordStart quality must be between \(minRecordingQuality) and \(maxRecordingQuality)"))
188+
}
186189
do {
187190
let resolvedOutPath = resolveRecordingOutPath(requestedOutPath)
188191
let fpsLabel = command.fps.map(String.init) ?? String(RunnerTests.defaultRecordingFps)
192+
let qualityLabel = command.quality.map(String.init) ?? "native"
189193
NSLog(
190-
"AGENT_DEVICE_RUNNER_RECORD_START requestedOutPath=%@ resolvedOutPath=%@ fps=%@",
194+
"AGENT_DEVICE_RUNNER_RECORD_START requestedOutPath=%@ resolvedOutPath=%@ fps=%@ quality=%@",
191195
requestedOutPath,
192196
resolvedOutPath,
193-
fpsLabel
197+
fpsLabel,
198+
qualityLabel
199+
)
200+
let recorder = ScreenRecorder(
201+
outputPath: resolvedOutPath,
202+
fps: command.fps.map { Int32($0) },
203+
quality: command.quality
194204
)
195-
let recorder = ScreenRecorder(outputPath: resolvedOutPath, fps: command.fps.map { Int32($0) })
196205
try recorder.start { [weak self] in
197206
return self?.captureRunnerFrame()
198207
}

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ struct Command: Codable {
5252
let scale: Double?
5353
let outPath: String?
5454
let fps: Int?
55+
let quality: Int?
5556
let interactiveOnly: Bool?
5657
let compact: Bool?
5758
let depth: Int?

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScreenRecorder.swift

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ extension RunnerTests {
77
final class ScreenRecorder {
88
private let outputPath: String
99
private let fps: Int32?
10+
private let quality: Int?
1011
private var effectiveFps: Int32 {
1112
max(1, fps ?? RunnerTests.defaultRecordingFps)
1213
}
@@ -25,9 +26,10 @@ extension RunnerTests {
2526
private var startedSession = false
2627
private var startError: Error?
2728

28-
init(outputPath: String, fps: Int32?) {
29+
init(outputPath: String, fps: Int32?, quality: Int?) {
2930
self.outputPath = outputPath
3031
self.fps = fps
32+
self.quality = quality
3133
}
3234

3335
func start(captureFrame: @escaping () -> RunnerImage?) throws {
@@ -48,7 +50,7 @@ extension RunnerTests {
4850
while Date() < bootstrapDeadline {
4951
if let image = captureFrame(), let cgImage = runnerCGImage(from: image) {
5052
bootstrapImage = image
51-
dimensions = CGSize(width: cgImage.width, height: cgImage.height)
53+
dimensions = scaledDimensions(width: cgImage.width, height: cgImage.height)
5254
break
5355
}
5456
Thread.sleep(forTimeInterval: 0.05)
@@ -240,11 +242,13 @@ extension RunnerTests {
240242

241243
CVPixelBufferLockBaseAddress(pixelBuffer, [])
242244
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, []) }
245+
let width = CVPixelBufferGetWidth(pixelBuffer)
246+
let height = CVPixelBufferGetHeight(pixelBuffer)
243247
guard
244248
let context = CGContext(
245249
data: CVPixelBufferGetBaseAddress(pixelBuffer),
246-
width: image.width,
247-
height: image.height,
250+
width: width,
251+
height: height,
248252
bitsPerComponent: 8,
249253
bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer),
250254
space: CGColorSpaceCreateDeviceRGB(),
@@ -253,8 +257,23 @@ extension RunnerTests {
253257
else {
254258
return nil
255259
}
256-
context.draw(image, in: CGRect(x: 0, y: 0, width: image.width, height: image.height))
260+
context.draw(image, in: CGRect(x: 0, y: 0, width: width, height: height))
257261
return pixelBuffer
258262
}
263+
264+
private func scaledDimensions(width: Int, height: Int) -> CGSize {
265+
guard let quality, quality < 10 else {
266+
return CGSize(width: width, height: height)
267+
}
268+
let scale = Double(quality) / 10.0
269+
return CGSize(
270+
width: scaledEvenDimension(width, scale: scale),
271+
height: scaledEvenDimension(height, scale: scale)
272+
)
273+
}
274+
275+
private func scaledEvenDimension(_ value: Int, scale: Double) -> Int {
276+
max(2, Int((Double(value) * scale / 2.0).rounded()) * 2)
277+
}
259278
}
260279
}

ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ final class RunnerTests: XCTestCase {
4848
let tvRemoteDoublePressDelayDefault: TimeInterval = 0.0
4949
let minRecordingFps = 1
5050
let maxRecordingFps = 120
51+
let minRecordingQuality = 5
52+
let maxRecordingQuality = 10
5153
var needsPostSnapshotInteractionDelay = false
5254
var needsFirstInteractionDelay = false
5355
var activeRecording: ScreenRecorder?
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import AVFoundation
2+
import Foundation
3+
4+
enum ResizeError: Error, CustomStringConvertible {
5+
case invalidArgs(String)
6+
case missingVideoTrack
7+
case exportFailed(String)
8+
9+
var description: String {
10+
switch self {
11+
case .invalidArgs(let message):
12+
return message
13+
case .missingVideoTrack:
14+
return "Input video does not contain a video track."
15+
case .exportFailed(let message):
16+
return message
17+
}
18+
}
19+
}
20+
21+
do {
22+
try run()
23+
} catch {
24+
fputs("recording-resize: \(error)\n", stderr)
25+
exit(1)
26+
}
27+
28+
func run() throws {
29+
let arguments = Array(CommandLine.arguments.dropFirst())
30+
let parsedArgs = try parseArguments(arguments)
31+
let inputURL = URL(fileURLWithPath: parsedArgs.inputPath)
32+
let outputURL = URL(fileURLWithPath: parsedArgs.outputPath)
33+
34+
if FileManager.default.fileExists(atPath: outputURL.path) {
35+
try FileManager.default.removeItem(at: outputURL)
36+
}
37+
38+
let asset = AVURLAsset(url: inputURL)
39+
guard let sourceVideoTrack = asset.tracks(withMediaType: .video).first else {
40+
throw ResizeError.missingVideoTrack
41+
}
42+
43+
let renderSize = scaledRenderSize(for: sourceVideoTrack, quality: parsedArgs.quality)
44+
let composition = AVMutableComposition()
45+
let fullRange = CMTimeRange(start: .zero, duration: asset.duration)
46+
47+
guard let compositionVideoTrack = composition.addMutableTrack(
48+
withMediaType: .video,
49+
preferredTrackID: kCMPersistentTrackID_Invalid
50+
) else {
51+
throw ResizeError.exportFailed("Failed to create composition video track.")
52+
}
53+
try compositionVideoTrack.insertTimeRange(fullRange, of: sourceVideoTrack, at: .zero)
54+
55+
if let sourceAudioTrack = asset.tracks(withMediaType: .audio).first,
56+
let compositionAudioTrack = composition.addMutableTrack(
57+
withMediaType: .audio,
58+
preferredTrackID: kCMPersistentTrackID_Invalid
59+
) {
60+
try? compositionAudioTrack.insertTimeRange(fullRange, of: sourceAudioTrack, at: .zero)
61+
}
62+
63+
let scale = CGFloat(parsedArgs.quality) / 10.0
64+
let videoComposition = AVMutableVideoComposition()
65+
videoComposition.renderSize = renderSize
66+
videoComposition.frameDuration = resolvedFrameDuration(for: sourceVideoTrack)
67+
68+
let instruction = AVMutableVideoCompositionInstruction()
69+
instruction.timeRange = fullRange
70+
let layerInstruction = AVMutableVideoCompositionLayerInstruction(assetTrack: compositionVideoTrack)
71+
// Scale the full preferred transform (including translation) to match the smaller render canvas.
72+
let scaledTransform = scaledPreferredTransform(sourceVideoTrack.preferredTransform, scale: scale)
73+
layerInstruction.setTransform(scaledTransform, at: .zero)
74+
instruction.layerInstructions = [layerInstruction]
75+
videoComposition.instructions = [instruction]
76+
77+
guard let exporter = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetHighestQuality) else {
78+
throw ResizeError.exportFailed("Failed to create export session.")
79+
}
80+
81+
exporter.outputURL = outputURL
82+
exporter.outputFileType = .mp4
83+
exporter.videoComposition = videoComposition
84+
exporter.shouldOptimizeForNetworkUse = true
85+
86+
let semaphore = DispatchSemaphore(value: 0)
87+
exporter.exportAsynchronously {
88+
semaphore.signal()
89+
}
90+
if semaphore.wait(timeout: .now() + 120) == .timedOut {
91+
exporter.cancelExport()
92+
throw ResizeError.exportFailed("Resize export timed out.")
93+
}
94+
95+
if exporter.status != .completed {
96+
throw ResizeError.exportFailed(exporter.error?.localizedDescription ?? "Resize export failed.")
97+
}
98+
}
99+
100+
func parseArguments(_ arguments: [String]) throws -> (inputPath: String, outputPath: String, quality: Int) {
101+
var inputPath: String?
102+
var outputPath: String?
103+
var quality: Int?
104+
var index = 0
105+
106+
while index < arguments.count {
107+
let argument = arguments[index]
108+
let nextIndex = index + 1
109+
switch argument {
110+
case "--input":
111+
guard nextIndex < arguments.count else { throw ResizeError.invalidArgs("--input requires a value") }
112+
inputPath = arguments[nextIndex]
113+
index += 2
114+
case "--output":
115+
guard nextIndex < arguments.count else { throw ResizeError.invalidArgs("--output requires a value") }
116+
outputPath = arguments[nextIndex]
117+
index += 2
118+
case "--quality":
119+
guard nextIndex < arguments.count else { throw ResizeError.invalidArgs("--quality requires a value") }
120+
guard let parsed = Int(arguments[nextIndex]), parsed >= 5, parsed <= 10 else {
121+
throw ResizeError.invalidArgs("--quality must be an integer between 5 and 10")
122+
}
123+
quality = parsed
124+
index += 2
125+
default:
126+
throw ResizeError.invalidArgs("Unknown argument: \(argument)")
127+
}
128+
}
129+
130+
guard let inputPath, let outputPath, let quality else {
131+
throw ResizeError.invalidArgs(
132+
"Usage: recording-resize.swift --input <video> --output <video> --quality <5-10>"
133+
)
134+
}
135+
return (inputPath, outputPath, quality)
136+
}
137+
138+
func resolvedRenderSize(for track: AVAssetTrack) -> CGSize {
139+
let transformed = track.naturalSize.applying(track.preferredTransform)
140+
return CGSize(width: abs(transformed.width), height: abs(transformed.height))
141+
}
142+
143+
func scaledRenderSize(for track: AVAssetTrack, quality: Int) -> CGSize {
144+
let renderSize = resolvedRenderSize(for: track)
145+
guard quality < 10 else { return renderSize }
146+
let scale = CGFloat(quality) / 10.0
147+
return CGSize(
148+
width: scaledDimension(renderSize.width, scale: scale),
149+
height: scaledDimension(renderSize.height, scale: scale)
150+
)
151+
}
152+
153+
func scaledDimension(_ value: CGFloat, scale: CGFloat) -> CGFloat {
154+
let evenValue = Int((Double(value * scale) / 2.0).rounded()) * 2
155+
return CGFloat(max(2, evenValue))
156+
}
157+
158+
func resolvedFrameDuration(for track: AVAssetTrack) -> CMTime {
159+
let minFrameDuration = track.minFrameDuration
160+
if minFrameDuration.isValid && !minFrameDuration.isIndefinite && minFrameDuration.seconds > 0 {
161+
return minFrameDuration
162+
}
163+
164+
let nominalFrameRate = track.nominalFrameRate
165+
if nominalFrameRate > 0 {
166+
let timescale = Int32(max(1, round(nominalFrameRate)))
167+
return CMTime(value: 1, timescale: timescale)
168+
}
169+
170+
return CMTime(value: 1, timescale: 60)
171+
}
172+
173+
func scaledPreferredTransform(_ transform: CGAffineTransform, scale: CGFloat) -> CGAffineTransform {
174+
CGAffineTransform(
175+
a: transform.a * scale,
176+
b: transform.b * scale,
177+
c: transform.c * scale,
178+
d: transform.d * scale,
179+
tx: transform.tx * scale,
180+
ty: transform.ty * scale
181+
)
182+
}

ios-runner/RUNNER_PROTOCOL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Examples:
3333
```
3434

3535
```json
36-
{ "command": "recordStart", "outPath": "/tmp/demo.mp4", "fps": 30 }
36+
{ "command": "recordStart", "outPath": "/tmp/demo.mp4", "fps": 30, "quality": 7 }
3737
```
3838

3939
```json

skills/agent-device/references/verification.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ agent-device record stop
8282
- On iOS, recording is a wrapper around `simctl` for simulators and the corresponding device capture path for physical devices.
8383
- On Android, recording is a wrapper around `adb`.
8484
- Recording writes a video artifact and a gesture-telemetry sidecar JSON.
85+
- Use `record start <path> --quality 5` when a smaller video is easier to inspect or share. The scale is 5-10, where 10 is native resolution; omit it to preserve native/current resolution.
8586
- On macOS hosts, touch overlay burn-in is available for supported recordings.
8687
- On non-macOS hosts, recording still succeeds but the video stays raw and `record stop` can return an `overlayWarning`.
8788
- If the agent already knows the interaction sequence and wants a more lifelike, uninterrupted recording, drive the flow with `batch` while recording instead of replanning between each step.

src/cli/commands/generic.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type {
55
FindOptions,
66
IsOptions,
77
PermissionTarget,
8+
RecordOptions,
89
SettingsUpdateOptions,
910
} from '../../client-types.ts';
1011
import { announceReplayTestRun } from '../../cli-test.ts';
@@ -205,6 +206,7 @@ export const genericClientCommandHandlers = {
205206
action: readStartStop(positionals[0], 'record'),
206207
path: positionals[1],
207208
fps: flags.fps,
209+
quality: readRecordingQuality(flags.quality),
208210
hideTouches: flags.hideTouches,
209211
}),
210212
),
@@ -346,6 +348,12 @@ function readStartStop(value: string | undefined, command: string): 'start' | 's
346348
throw new AppError('INVALID_ARGS', `${command} requires start|stop`);
347349
}
348350

351+
function readRecordingQuality(value: number | undefined): RecordOptions['quality'] {
352+
if (value === undefined) return undefined;
353+
if ([5, 6, 7, 8, 9, 10].includes(value)) return value as RecordOptions['quality'];
354+
throw new AppError('INVALID_ARGS', `Invalid quality: ${value}`);
355+
}
356+
349357
function readLogsAction(
350358
value: string | undefined,
351359
): 'path' | 'start' | 'stop' | 'doctor' | 'mark' | 'clear' | undefined {

src/client-normalizers.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags {
274274
out: options.out,
275275
count: options.count,
276276
fps: options.fps,
277+
quality: options.quality,
277278
hideTouches: options.hideTouches,
278279
intervalMs: options.intervalMs,
279280
delayMs: options.delayMs,

src/client-types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,10 +651,13 @@ export type NetworkOptions = AgentDeviceRequestOverrides & {
651651
include?: 'summary' | 'headers' | 'body' | 'all';
652652
};
653653

654+
type RecordingQuality = 5 | 6 | 7 | 8 | 9 | 10;
655+
654656
export type RecordOptions = AgentDeviceRequestOverrides & {
655657
action: 'start' | 'stop';
656658
path?: string;
657659
fps?: number;
660+
quality?: RecordingQuality;
658661
hideTouches?: boolean;
659662
};
660663

@@ -716,6 +719,7 @@ type CommandExecutionOptions = {
716719
screenshotFullscreen?: boolean;
717720
count?: number;
718721
fps?: number;
722+
quality?: RecordingQuality;
719723
hideTouches?: boolean;
720724
intervalMs?: number;
721725
delayMs?: number;

0 commit comments

Comments
 (0)