Skip to content

Commit beb7729

Browse files
authored
Merge pull request #29 from NeptuneKit/feat/20260604-issue-26-sim-screenshot-orientation
fix(cli): expose sim screenshot orientation metadata
2 parents 26fd564 + b35f82b commit beb7729

9 files changed

Lines changed: 186 additions & 3 deletions

File tree

CLI/Tests/TritonKitCLITests/DeviceCrossPlatformTests.swift

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,26 @@ struct DeviceCrossPlatformTests {
7676
}
7777
}
7878

79+
@Test("sim screenshot metadata documents raw framebuffer orientation semantics")
80+
func simulatorScreenshotMetadataDocumentsRawFramebufferOrientationSemantics() throws {
81+
let temp = FileManager.default.temporaryDirectory
82+
.appendingPathComponent("triton-sim-screenshot-metadata-\(UUID().uuidString)")
83+
try FileManager.default.createDirectory(at: temp, withIntermediateDirectories: true)
84+
let path = temp.appendingPathComponent("shot.png")
85+
try writeMinimalPNG(width: 768, height: 1024, to: path)
86+
87+
let metadata = try makeSimulatorScreenshotMetadata(outputPath: path.path)
88+
89+
#expect(metadata.path == path.path)
90+
#expect(metadata.contentType == "image/png")
91+
#expect(metadata.pixelWidth == 768)
92+
#expect(metadata.pixelHeight == 1024)
93+
#expect(metadata.orientationSemantics == "raw-simctl-framebuffer")
94+
#expect(metadata.normalizationApplied == false)
95+
#expect(metadata.normalizationStrategy == "metadata-only")
96+
#expect(metadata.note.contains("raw framebuffer"))
97+
}
98+
7999
@Test("app and smoke schemas expose unified device selector with explicit selector forms")
80100
func appAndSmokeSchemasExposeUnifiedDeviceSelector() throws {
81101
let app = try #require(commandSchemas().first { $0.name == "app" })
@@ -358,3 +378,20 @@ private func iosTarget(
358378
transport: nil
359379
)
360380
}
381+
382+
private func writeMinimalPNG(width: UInt32, height: UInt32, to url: URL) throws {
383+
var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])
384+
data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D])
385+
data.append(contentsOf: [0x49, 0x48, 0x44, 0x52])
386+
data.append(UInt8((width >> 24) & 0xff))
387+
data.append(UInt8((width >> 16) & 0xff))
388+
data.append(UInt8((width >> 8) & 0xff))
389+
data.append(UInt8(width & 0xff))
390+
data.append(UInt8((height >> 24) & 0xff))
391+
data.append(UInt8((height >> 16) & 0xff))
392+
data.append(UInt8((height >> 8) & 0xff))
393+
data.append(UInt8(height & 0xff))
394+
data.append(contentsOf: [8, 6, 0, 0, 0])
395+
data.append(contentsOf: [0, 0, 0, 0])
396+
try data.write(to: url, options: .atomic)
397+
}

CLI/Tests/TritonKitCLITests/SchemaFactSourceTests.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3812,7 +3812,7 @@ struct SchemaFactSourceTests {
38123812
])
38133813
expectContract(sim, selector: "host.simulator-action", fields: [
38143814
"ok", "action", "runtimeScope", "target", "tool", "exitCode", "riskLevel",
3815-
"sourceCommand", "stdoutTruncated", "stderrTruncated", "artifacts", "note",
3815+
"sourceCommand", "stdoutTruncated", "stderrTruncated", "artifacts", "screenshot", "note",
38163816
])
38173817

38183818
#expect(app.failureCodes.contains("app_launch_failed"))
@@ -4041,6 +4041,7 @@ private func outputContractKindTaxonomy() -> Set<String> {
40414041
"host-device-ready",
40424042
"host-device-selection",
40434043
"host-simulator-list",
4044+
"host-simulator-screenshot-metadata",
40444045
"input-batch-summary",
40454046
"input-result",
40464047
"node-attributes",

Sources/TritonKitCLI/CLIHostModels.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,17 @@ struct HostSimulatorReadyEvent: Encodable {
459459
let sourceCommand: String?
460460
}
461461

462+
struct HostSimulatorScreenshotMetadata: Encodable, Equatable {
463+
let path: String
464+
let contentType: String
465+
let pixelWidth: Int?
466+
let pixelHeight: Int?
467+
let orientationSemantics: String
468+
let normalizationApplied: Bool
469+
let normalizationStrategy: String
470+
let note: String
471+
}
472+
462473
struct HostActionOutput: Encodable {
463474
let ok: Bool
464475
let action: String
@@ -474,6 +485,7 @@ struct HostActionOutput: Encodable {
474485
let stdout: String?
475486
let stderr: String?
476487
let artifacts: [String]
488+
let screenshot: HostSimulatorScreenshotMetadata?
477489
let note: String?
478490

479491
init(
@@ -491,6 +503,7 @@ struct HostActionOutput: Encodable {
491503
stdout: String?,
492504
stderr: String?,
493505
artifacts: [String],
506+
screenshot: HostSimulatorScreenshotMetadata? = nil,
494507
note: String?
495508
) {
496509
self.ok = ok
@@ -507,6 +520,7 @@ struct HostActionOutput: Encodable {
507520
self.stdout = stdout
508521
self.stderr = stderr
509522
self.artifacts = artifacts
523+
self.screenshot = screenshot
510524
self.note = note
511525
}
512526
}

Sources/TritonKitCLI/CLIHostRuntime.swift

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ func runSimpleHostCommand(
3333
stdout: stdout.isEmpty ? nil : stdout,
3434
stderr: stderr.isEmpty ? nil : stderr,
3535
artifacts: artifacts,
36+
screenshot: action == "sim.screenshot" && artifacts.count == 1 ? (try? makeSimulatorScreenshotMetadata(outputPath: artifacts[0])) : nil,
3637
note: note
3738
)
3839
switch outputFormat {
@@ -1545,3 +1546,35 @@ func failHostValidation(code: String, message: String, hint: String, outputForma
15451546
}
15461547
throw ExitCode.failure
15471548
}
1549+
1550+
func makeSimulatorScreenshotMetadata(outputPath: String) throws -> HostSimulatorScreenshotMetadata {
1551+
let dimensions = try? readPNGDimensions(path: outputPath)
1552+
return HostSimulatorScreenshotMetadata(
1553+
path: outputPath,
1554+
contentType: "image/png",
1555+
pixelWidth: dimensions?.width,
1556+
pixelHeight: dimensions?.height,
1557+
orientationSemantics: "raw-simctl-framebuffer",
1558+
normalizationApplied: false,
1559+
normalizationStrategy: "metadata-only",
1560+
note: "TritonKit preserves the raw framebuffer orientation emitted by `xcrun simctl io screenshot`; compare pixelWidth/pixelHeight and simulator display state before treating this artifact as a display-normalized screenshot."
1561+
)
1562+
}
1563+
1564+
private func readPNGDimensions(path: String) throws -> (width: Int, height: Int) {
1565+
let url = URL(fileURLWithPath: path)
1566+
let data = try Data(contentsOf: url, options: [.mappedIfSafe])
1567+
guard data.count >= 24 else {
1568+
throw RuntimeError("Screenshot metadata could not be read: PNG file is too short.")
1569+
}
1570+
let signature: [UInt8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]
1571+
guard Array(data.prefix(8)) == signature else {
1572+
throw RuntimeError("Screenshot metadata could not be read: output is not a PNG file.")
1573+
}
1574+
guard String(data: data[12..<16], encoding: .ascii) == "IHDR" else {
1575+
throw RuntimeError("Screenshot metadata could not be read: PNG IHDR chunk is missing.")
1576+
}
1577+
let width = data[16..<20].reduce(UInt32(0)) { ($0 << 8) | UInt32($1) }
1578+
let height = data[20..<24].reduce(UInt32(0)) { ($0 << 8) | UInt32($1) }
1579+
return (Int(width), Int(height))
1580+
}

Sources/TritonKitCLI/CLISchemaContracts.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,11 +698,31 @@ func hostActionOutputContract(selector: String, model: String) -> TKCommandOutpu
698698
("stdout", "String?", false, "Bounded stdout sample"),
699699
("stderr", "String?", false, "Bounded stderr sample"),
700700
("artifacts", "[String]", true, "Written artifact paths"),
701+
("screenshot", "HostSimulatorScreenshotMetadata?", false, "Simulator screenshot orientation and pixel metadata"),
701702
("note", "String?", false, "Boundary or follow-up note"),
702703
])
703704
)
704705
}
705706

707+
func hostSimulatorScreenshotMetadataOutputContract() -> TKCommandOutputContract {
708+
TKCommandOutputContract(
709+
selector: "host.simulator-screenshot-metadata",
710+
format: "json",
711+
kind: "host-simulator-screenshot-metadata",
712+
model: "HostSimulatorScreenshotMetadata",
713+
fields: schemaContractFields([
714+
("path", "String", true, "Screenshot artifact path"),
715+
("contentType", "String", true, "Screenshot content type"),
716+
("pixelWidth", "Int?", false, "PNG pixel width when readable"),
717+
("pixelHeight", "Int?", false, "PNG pixel height when readable"),
718+
("orientationSemantics", "String", true, "Screenshot orientation coordinate-space semantics"),
719+
("normalizationApplied", "Bool", true, "Whether TritonKit rotated or otherwise normalized the image"),
720+
("normalizationStrategy", "String", true, "Normalization strategy used for this artifact"),
721+
("note", "String", true, "Human-readable caveat for agents and evidence consumers"),
722+
])
723+
)
724+
}
725+
706726
func hostHarmonyTapOutputContract() -> TKCommandOutputContract {
707727
TKCommandOutputContract(
708728
selector: "host.harmony-tap",

Sources/TritonKitCLI/CLISchemaHostCommands.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,9 @@ func hostCommandSchemas() -> [TKCommandSchema] {
175175
"triton sim runtime delete <runtime-id> --dry-run --json",
176176
"triton sim personalization scan-and-personalize --json",
177177
],
178-
successShape: "{ ok, simulators[] } or { ok, runtimes[], count, verbose, sourceCommand } or { ok, action, simulator?, defaultsPath? } or { ok, action, runtimeScope, target, tool, exitCode, sourceCommand, stdout?, stderr?, stdoutTruncated?, stderrTruncated?, artifacts[], note? } or { ok, action, artifact, stdoutBytes, stderrBytes, stdoutTruncated, stderrTruncated } or JSONL { ok, action, state, ready, attempt, elapsedMs }",
178+
successShape: "{ ok, simulators[] } or { ok, runtimes[], count, verbose, sourceCommand } or { ok, action, simulator?, defaultsPath? } or { ok, action, runtimeScope, target, tool, exitCode, sourceCommand, stdout?, stderr?, stdoutTruncated?, stderrTruncated?, artifacts[], screenshot?, note? } or { ok, action, artifact, stdoutBytes, stderrBytes, stdoutTruncated, stderrTruncated } or JSONL { ok, action, state, ready, attempt, elapsedMs }",
179179
failureShape: "{ ok:false, error:{ code, message, hint, nextAction? } }",
180-
outputSemantics: "Use sim for Apple Simulator host control and maintenance. Destructive operations require explicit confirm flags; agents should resolve/use a simulator before app or smoke flows.",
180+
outputSemantics: "Use sim for Apple Simulator host control and maintenance. Destructive operations require explicit confirm flags; agents should resolve/use a simulator before app or smoke flows. sim screenshot preserves simctl raw framebuffer orientation and returns screenshot metadata so agents do not assume display-normalized orientation.",
181181
artifacts: ["simulator-screenshot", "simulator-video", "simulator-logs", "simulator-diagnostics"],
182182
nextCommands: [
183183
"triton sim use <udid> --json",
@@ -189,6 +189,7 @@ func hostCommandSchemas() -> [TKCommandSchema] {
189189
outputContracts: [
190190
hostSimulatorListOutputContract(),
191191
hostActionOutputContract(selector: "host.simulator-action", model: "HostActionOutput|HostArtifactCaptureOutput|HostSimulatorUseOutput|HostSimulatorReadyEvent"),
192+
hostSimulatorScreenshotMetadataOutputContract(),
192193
],
193194
failureCodes: [
194195
"simulator_not_found",

docs-linhay/memory/2026-06-04.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,10 @@
44

55
-`docs-linhay/spaces/20260604-issue-27-agent-skills-readme/` 建立执行计划,验收聚焦根 README 是否明确可选 Codex / agent skills 的 public/internal 边界。
66
- 根 README 新增 `Optional Agent Skills` 章节:外部用户只安装 `.agents/tritonkit-skills/public/` 或 release asset `tritonkit-skills.tar.gz` 中的 public skills;当前 public skills 为 `tritonkit-dev-feedback``tritonkit-emulator-cli-takeover``tritonkit-real-project-regression``.agents/tritonkit-skills/internal/` 仅用于 TritonKit repo maintenance,不默认安装到 adopting projects;安装后需重启 Codex / agent session。
7+
8+
## Issue 26 simulator screenshot orientation metadata
9+
10+
-`triton sim screenshot --json` 增加可选 `screenshot` metadata,语义为 `raw-simctl-framebuffer`,并暴露 PNG `pixelWidth` / `pixelHeight``normalizationApplied=false``normalizationStrategy=metadata-only`
11+
- schema contract 新增 `host.simulator-screenshot-metadata``sim` output semantics 明确截图保持 `xcrun simctl io screenshot` 原始 framebuffer orientation,agent 不应默认视为 display-normalized。
12+
- 当前未做 PNG 旋转归一化;若后续能可靠取得 display orientation,可另建需求把 metadata-only 演进为 normalized artifact。
13+
- 注意:feature worktree 目录名不是 `TritonKit` 时,`swift test --package-path CLI` 会因 SwiftPM local package identity 失败;本轮通过 `/tmp/.../TritonKit` 临时副本验证 CLI tests。
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Issue 26 - triton sim screenshot iPad mini orientation
2+
3+
## 背景
4+
5+
GitHub Issue #26 报告:在 iPad mini (A17 Pro) Simulator 上执行 `triton sim screenshot` 后,生成 PNG 内容相对用户看到的模拟器显示方向旋转 90 度。当前 JSON 输出只透传 simctl command/stderr,未归一化方向,也未暴露 orientation metadata。
6+
7+
## 目标
8+
9+
- 调查 `triton sim screenshot` host-side 实现与 JSON 输出契约。
10+
- 通过测试覆盖 iPad/simctl screenshot 方向相关行为。
11+
- 在不扩大产品边界的前提下,给出最小修复:优先让输出匹配可预期显示方向;若无法可靠归一化,则至少在 JSON 中暴露明确 orientation/display metadata 与文档说明。
12+
13+
## 非目标
14+
15+
- 不新增 Web/Wails UI。
16+
- 不接入真机、远端 agent、设备云或内置 VLM loop。
17+
- 不直接依赖 XcodeBuildMCP 对外 API;Triton CLI/HTTP schema 仍是 agent 入口。
18+
19+
## BDD 场景与验收
20+
21+
### 场景:agent 获取 simulator screenshot 结果时能判断方向语义
22+
23+
Given 一个 iPad Simulator 已启动并可截图
24+
When agent 执行 `triton sim screenshot --simulator <UDID> --output <png> --json`
25+
Then 命令应成功写出 PNG
26+
And JSON 输出应让 agent 明确知道截图方向是否已归一化或包含足够 metadata 判断方向
27+
And 文档应说明该行为,避免把 raw framebuffer orientation 误当最终证据方向
28+
29+
## 相关链接
30+
31+
- GitHub Issue: https://github.com/NeptuneKit/TritonKit/issues/26
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Issue 26 Implementation Plan
2+
3+
## 验收场景
4+
5+
Given 一个 iPad Simulator 已启动并可截图
6+
When agent 执行 `triton sim screenshot --simulator <UDID> --output <png> --json`
7+
Then 命令应成功写出 PNG
8+
And JSON 输出应包含 `screenshot` metadata,明确当前 artifact 是 `raw-simctl-framebuffer` 语义
9+
And metadata 应暴露 PNG `pixelWidth` / `pixelHeight``normalizationApplied``normalizationStrategy`
10+
And schema contract 应声明 `host.simulator-screenshot-metadata`,避免 agent 将 raw framebuffer 当成 display-normalized screenshot
11+
12+
## 实现记录
13+
14+
- `HostActionOutput` 增加可选 `screenshot` metadata。
15+
- `triton sim screenshot` 写出单个 artifact 时读取 PNG IHDR 宽高,并返回 metadata。
16+
- 当前最小修复采用 `metadata-only` 策略,不对 PNG 做旋转归一化,避免在缺少可靠 display orientation 来源时产生二次误导。
17+
- schema 的 `sim` 命令输出语义与 output contract 同步说明 raw framebuffer orientation。
18+
19+
## 验证
20+
21+
在 issue worktree 目录名不是 `TritonKit` 时,SwiftPM local package identity 会把根包识别成 worktree slug,导致 `CLI/Package.swift``package: "tritonkit"` 依赖解析失败。因此测试通过 `/tmp/tritonkit-issue26-copy/TritonKit` 临时副本执行。
22+
23+
已运行:
24+
25+
```sh
26+
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter DeviceCrossPlatformTests.simulatorScreenshotMetadataDocumentsRawFramebufferOrientationSemantics
27+
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.hostWorkflowSchemasExposeTargetAndArtifactContracts
28+
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractsExposeNonemptyFields
29+
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractKindsStayWithinAgentTaxonomy
30+
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractModelsStayMachineReadable
31+
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractSelectorsAndKindsUseStableAgentKeys
32+
```
33+
34+
结果均通过。
35+
36+
## 剩余风险
37+
38+
- 本次没有实现图像旋转归一化;输出仍保持 `xcrun simctl io screenshot` 原始 framebuffer。该选择是为了先让 agent/evidence consumer 具备机器可读方向语义,后续若能可靠取得当前 display orientation,可另起需求实现归一化。
39+
- 未在真实 iPad mini Simulator 上复测,因为当前任务收口以 CLI schema/model 单元测试为主。

0 commit comments

Comments
 (0)