diff --git a/CLI/Tests/TritonKitCLITests/DeviceCrossPlatformTests.swift b/CLI/Tests/TritonKitCLITests/DeviceCrossPlatformTests.swift index 4f6829e..a945b71 100644 --- a/CLI/Tests/TritonKitCLITests/DeviceCrossPlatformTests.swift +++ b/CLI/Tests/TritonKitCLITests/DeviceCrossPlatformTests.swift @@ -76,6 +76,26 @@ struct DeviceCrossPlatformTests { } } + @Test("sim screenshot metadata documents raw framebuffer orientation semantics") + func simulatorScreenshotMetadataDocumentsRawFramebufferOrientationSemantics() throws { + let temp = FileManager.default.temporaryDirectory + .appendingPathComponent("triton-sim-screenshot-metadata-\(UUID().uuidString)") + try FileManager.default.createDirectory(at: temp, withIntermediateDirectories: true) + let path = temp.appendingPathComponent("shot.png") + try writeMinimalPNG(width: 768, height: 1024, to: path) + + let metadata = try makeSimulatorScreenshotMetadata(outputPath: path.path) + + #expect(metadata.path == path.path) + #expect(metadata.contentType == "image/png") + #expect(metadata.pixelWidth == 768) + #expect(metadata.pixelHeight == 1024) + #expect(metadata.orientationSemantics == "raw-simctl-framebuffer") + #expect(metadata.normalizationApplied == false) + #expect(metadata.normalizationStrategy == "metadata-only") + #expect(metadata.note.contains("raw framebuffer")) + } + @Test("app and smoke schemas expose unified device selector with explicit selector forms") func appAndSmokeSchemasExposeUnifiedDeviceSelector() throws { let app = try #require(commandSchemas().first { $0.name == "app" }) @@ -358,3 +378,20 @@ private func iosTarget( transport: nil ) } + +private func writeMinimalPNG(width: UInt32, height: UInt32, to url: URL) throws { + var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) + data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D]) + data.append(contentsOf: [0x49, 0x48, 0x44, 0x52]) + data.append(UInt8((width >> 24) & 0xff)) + data.append(UInt8((width >> 16) & 0xff)) + data.append(UInt8((width >> 8) & 0xff)) + data.append(UInt8(width & 0xff)) + data.append(UInt8((height >> 24) & 0xff)) + data.append(UInt8((height >> 16) & 0xff)) + data.append(UInt8((height >> 8) & 0xff)) + data.append(UInt8(height & 0xff)) + data.append(contentsOf: [8, 6, 0, 0, 0]) + data.append(contentsOf: [0, 0, 0, 0]) + try data.write(to: url, options: .atomic) +} diff --git a/CLI/Tests/TritonKitCLITests/SchemaFactSourceTests.swift b/CLI/Tests/TritonKitCLITests/SchemaFactSourceTests.swift index 3942462..50783eb 100644 --- a/CLI/Tests/TritonKitCLITests/SchemaFactSourceTests.swift +++ b/CLI/Tests/TritonKitCLITests/SchemaFactSourceTests.swift @@ -3812,7 +3812,7 @@ struct SchemaFactSourceTests { ]) expectContract(sim, selector: "host.simulator-action", fields: [ "ok", "action", "runtimeScope", "target", "tool", "exitCode", "riskLevel", - "sourceCommand", "stdoutTruncated", "stderrTruncated", "artifacts", "note", + "sourceCommand", "stdoutTruncated", "stderrTruncated", "artifacts", "screenshot", "note", ]) #expect(app.failureCodes.contains("app_launch_failed")) @@ -4041,6 +4041,7 @@ private func outputContractKindTaxonomy() -> Set { "host-device-ready", "host-device-selection", "host-simulator-list", + "host-simulator-screenshot-metadata", "input-batch-summary", "input-result", "node-attributes", diff --git a/Sources/TritonKitCLI/CLIHostModels.swift b/Sources/TritonKitCLI/CLIHostModels.swift index 0cc5e4b..6b6c7aa 100644 --- a/Sources/TritonKitCLI/CLIHostModels.swift +++ b/Sources/TritonKitCLI/CLIHostModels.swift @@ -459,6 +459,17 @@ struct HostSimulatorReadyEvent: Encodable { let sourceCommand: String? } +struct HostSimulatorScreenshotMetadata: Encodable, Equatable { + let path: String + let contentType: String + let pixelWidth: Int? + let pixelHeight: Int? + let orientationSemantics: String + let normalizationApplied: Bool + let normalizationStrategy: String + let note: String +} + struct HostActionOutput: Encodable { let ok: Bool let action: String @@ -474,6 +485,7 @@ struct HostActionOutput: Encodable { let stdout: String? let stderr: String? let artifacts: [String] + let screenshot: HostSimulatorScreenshotMetadata? let note: String? init( @@ -491,6 +503,7 @@ struct HostActionOutput: Encodable { stdout: String?, stderr: String?, artifacts: [String], + screenshot: HostSimulatorScreenshotMetadata? = nil, note: String? ) { self.ok = ok @@ -507,6 +520,7 @@ struct HostActionOutput: Encodable { self.stdout = stdout self.stderr = stderr self.artifacts = artifacts + self.screenshot = screenshot self.note = note } } diff --git a/Sources/TritonKitCLI/CLIHostRuntime.swift b/Sources/TritonKitCLI/CLIHostRuntime.swift index 8efeeab..5feeea3 100644 --- a/Sources/TritonKitCLI/CLIHostRuntime.swift +++ b/Sources/TritonKitCLI/CLIHostRuntime.swift @@ -33,6 +33,7 @@ func runSimpleHostCommand( stdout: stdout.isEmpty ? nil : stdout, stderr: stderr.isEmpty ? nil : stderr, artifacts: artifacts, + screenshot: action == "sim.screenshot" && artifacts.count == 1 ? (try? makeSimulatorScreenshotMetadata(outputPath: artifacts[0])) : nil, note: note ) switch outputFormat { @@ -1545,3 +1546,35 @@ func failHostValidation(code: String, message: String, hint: String, outputForma } throw ExitCode.failure } + +func makeSimulatorScreenshotMetadata(outputPath: String) throws -> HostSimulatorScreenshotMetadata { + let dimensions = try? readPNGDimensions(path: outputPath) + return HostSimulatorScreenshotMetadata( + path: outputPath, + contentType: "image/png", + pixelWidth: dimensions?.width, + pixelHeight: dimensions?.height, + orientationSemantics: "raw-simctl-framebuffer", + normalizationApplied: false, + normalizationStrategy: "metadata-only", + note: "TritonKit preserves the raw framebuffer orientation emitted by `xcrun simctl io screenshot`; compare pixelWidth/pixelHeight and simulator display state before treating this artifact as a display-normalized screenshot." + ) +} + +private func readPNGDimensions(path: String) throws -> (width: Int, height: Int) { + let url = URL(fileURLWithPath: path) + let data = try Data(contentsOf: url, options: [.mappedIfSafe]) + guard data.count >= 24 else { + throw RuntimeError("Screenshot metadata could not be read: PNG file is too short.") + } + let signature: [UInt8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] + guard Array(data.prefix(8)) == signature else { + throw RuntimeError("Screenshot metadata could not be read: output is not a PNG file.") + } + guard String(data: data[12..<16], encoding: .ascii) == "IHDR" else { + throw RuntimeError("Screenshot metadata could not be read: PNG IHDR chunk is missing.") + } + let width = data[16..<20].reduce(UInt32(0)) { ($0 << 8) | UInt32($1) } + let height = data[20..<24].reduce(UInt32(0)) { ($0 << 8) | UInt32($1) } + return (Int(width), Int(height)) +} diff --git a/Sources/TritonKitCLI/CLISchemaContracts.swift b/Sources/TritonKitCLI/CLISchemaContracts.swift index 49703b2..e991054 100644 --- a/Sources/TritonKitCLI/CLISchemaContracts.swift +++ b/Sources/TritonKitCLI/CLISchemaContracts.swift @@ -698,11 +698,31 @@ func hostActionOutputContract(selector: String, model: String) -> TKCommandOutpu ("stdout", "String?", false, "Bounded stdout sample"), ("stderr", "String?", false, "Bounded stderr sample"), ("artifacts", "[String]", true, "Written artifact paths"), + ("screenshot", "HostSimulatorScreenshotMetadata?", false, "Simulator screenshot orientation and pixel metadata"), ("note", "String?", false, "Boundary or follow-up note"), ]) ) } +func hostSimulatorScreenshotMetadataOutputContract() -> TKCommandOutputContract { + TKCommandOutputContract( + selector: "host.simulator-screenshot-metadata", + format: "json", + kind: "host-simulator-screenshot-metadata", + model: "HostSimulatorScreenshotMetadata", + fields: schemaContractFields([ + ("path", "String", true, "Screenshot artifact path"), + ("contentType", "String", true, "Screenshot content type"), + ("pixelWidth", "Int?", false, "PNG pixel width when readable"), + ("pixelHeight", "Int?", false, "PNG pixel height when readable"), + ("orientationSemantics", "String", true, "Screenshot orientation coordinate-space semantics"), + ("normalizationApplied", "Bool", true, "Whether TritonKit rotated or otherwise normalized the image"), + ("normalizationStrategy", "String", true, "Normalization strategy used for this artifact"), + ("note", "String", true, "Human-readable caveat for agents and evidence consumers"), + ]) + ) +} + func hostHarmonyTapOutputContract() -> TKCommandOutputContract { TKCommandOutputContract( selector: "host.harmony-tap", diff --git a/Sources/TritonKitCLI/CLISchemaHostCommands.swift b/Sources/TritonKitCLI/CLISchemaHostCommands.swift index a7d1c77..b012ddf 100644 --- a/Sources/TritonKitCLI/CLISchemaHostCommands.swift +++ b/Sources/TritonKitCLI/CLISchemaHostCommands.swift @@ -175,9 +175,9 @@ func hostCommandSchemas() -> [TKCommandSchema] { "triton sim runtime delete --dry-run --json", "triton sim personalization scan-and-personalize --json", ], - successShape: "{ ok, simulators[] } or { ok, runtimes[], count, verbose, sourceCommand } or { ok, action, simulator?, defaultsPath? } or { ok, action, runtimeScope, target, tool, exitCode, sourceCommand, stdout?, stderr?, stdoutTruncated?, stderrTruncated?, artifacts[], note? } or { ok, action, artifact, stdoutBytes, stderrBytes, stdoutTruncated, stderrTruncated } or JSONL { ok, action, state, ready, attempt, elapsedMs }", + successShape: "{ ok, simulators[] } or { ok, runtimes[], count, verbose, sourceCommand } or { ok, action, simulator?, defaultsPath? } or { ok, action, runtimeScope, target, tool, exitCode, sourceCommand, stdout?, stderr?, stdoutTruncated?, stderrTruncated?, artifacts[], screenshot?, note? } or { ok, action, artifact, stdoutBytes, stderrBytes, stdoutTruncated, stderrTruncated } or JSONL { ok, action, state, ready, attempt, elapsedMs }", failureShape: "{ ok:false, error:{ code, message, hint, nextAction? } }", - outputSemantics: "Use sim for Apple Simulator host control and maintenance. Destructive operations require explicit confirm flags; agents should resolve/use a simulator before app or smoke flows.", + outputSemantics: "Use sim for Apple Simulator host control and maintenance. Destructive operations require explicit confirm flags; agents should resolve/use a simulator before app or smoke flows. sim screenshot preserves simctl raw framebuffer orientation and returns screenshot metadata so agents do not assume display-normalized orientation.", artifacts: ["simulator-screenshot", "simulator-video", "simulator-logs", "simulator-diagnostics"], nextCommands: [ "triton sim use --json", @@ -189,6 +189,7 @@ func hostCommandSchemas() -> [TKCommandSchema] { outputContracts: [ hostSimulatorListOutputContract(), hostActionOutputContract(selector: "host.simulator-action", model: "HostActionOutput|HostArtifactCaptureOutput|HostSimulatorUseOutput|HostSimulatorReadyEvent"), + hostSimulatorScreenshotMetadataOutputContract(), ], failureCodes: [ "simulator_not_found", diff --git a/docs-linhay/memory/2026-06-04.md b/docs-linhay/memory/2026-06-04.md index 352490b..05881d0 100644 --- a/docs-linhay/memory/2026-06-04.md +++ b/docs-linhay/memory/2026-06-04.md @@ -4,3 +4,10 @@ - 为 `docs-linhay/spaces/20260604-issue-27-agent-skills-readme/` 建立执行计划,验收聚焦根 README 是否明确可选 Codex / agent skills 的 public/internal 边界。 - 根 README 新增 `Optional Agent Skills` 章节:外部用户只安装 `.agents/tritonkit-skills/public/` 或 release asset `tritonkit-skills.tar.gz` 中的 public skills;当前 public skills 为 `tritonkit-dev-feedback`、`tritonkit-emulator-cli-takeover`、`tritonkit-real-project-regression`;`.agents/tritonkit-skills/internal/` 仅用于 TritonKit repo maintenance,不默认安装到 adopting projects;安装后需重启 Codex / agent session。 + +## Issue 26 simulator screenshot orientation metadata + +- 为 `triton sim screenshot --json` 增加可选 `screenshot` metadata,语义为 `raw-simctl-framebuffer`,并暴露 PNG `pixelWidth` / `pixelHeight`、`normalizationApplied=false`、`normalizationStrategy=metadata-only`。 +- schema contract 新增 `host.simulator-screenshot-metadata`,`sim` output semantics 明确截图保持 `xcrun simctl io screenshot` 原始 framebuffer orientation,agent 不应默认视为 display-normalized。 +- 当前未做 PNG 旋转归一化;若后续能可靠取得 display orientation,可另建需求把 metadata-only 演进为 normalized artifact。 +- 注意:feature worktree 目录名不是 `TritonKit` 时,`swift test --package-path CLI` 会因 SwiftPM local package identity 失败;本轮通过 `/tmp/.../TritonKit` 临时副本验证 CLI tests。 diff --git a/docs-linhay/spaces/20260604-issue-26-sim-screenshot-orientation/README.md b/docs-linhay/spaces/20260604-issue-26-sim-screenshot-orientation/README.md new file mode 100644 index 0000000..5af04c9 --- /dev/null +++ b/docs-linhay/spaces/20260604-issue-26-sim-screenshot-orientation/README.md @@ -0,0 +1,31 @@ +# Issue 26 - triton sim screenshot iPad mini orientation + +## 背景 + +GitHub Issue #26 报告:在 iPad mini (A17 Pro) Simulator 上执行 `triton sim screenshot` 后,生成 PNG 内容相对用户看到的模拟器显示方向旋转 90 度。当前 JSON 输出只透传 simctl command/stderr,未归一化方向,也未暴露 orientation metadata。 + +## 目标 + +- 调查 `triton sim screenshot` host-side 实现与 JSON 输出契约。 +- 通过测试覆盖 iPad/simctl screenshot 方向相关行为。 +- 在不扩大产品边界的前提下,给出最小修复:优先让输出匹配可预期显示方向;若无法可靠归一化,则至少在 JSON 中暴露明确 orientation/display metadata 与文档说明。 + +## 非目标 + +- 不新增 Web/Wails UI。 +- 不接入真机、远端 agent、设备云或内置 VLM loop。 +- 不直接依赖 XcodeBuildMCP 对外 API;Triton CLI/HTTP schema 仍是 agent 入口。 + +## BDD 场景与验收 + +### 场景:agent 获取 simulator screenshot 结果时能判断方向语义 + +Given 一个 iPad Simulator 已启动并可截图 +When agent 执行 `triton sim screenshot --simulator --output --json` +Then 命令应成功写出 PNG +And JSON 输出应让 agent 明确知道截图方向是否已归一化或包含足够 metadata 判断方向 +And 文档应说明该行为,避免把 raw framebuffer orientation 误当最终证据方向 + +## 相关链接 + +- GitHub Issue: https://github.com/NeptuneKit/TritonKit/issues/26 diff --git a/docs-linhay/spaces/20260604-issue-26-sim-screenshot-orientation/plans/implementation.md b/docs-linhay/spaces/20260604-issue-26-sim-screenshot-orientation/plans/implementation.md new file mode 100644 index 0000000..cdaef42 --- /dev/null +++ b/docs-linhay/spaces/20260604-issue-26-sim-screenshot-orientation/plans/implementation.md @@ -0,0 +1,39 @@ +# Issue 26 Implementation Plan + +## 验收场景 + +Given 一个 iPad Simulator 已启动并可截图 +When agent 执行 `triton sim screenshot --simulator --output --json` +Then 命令应成功写出 PNG +And JSON 输出应包含 `screenshot` metadata,明确当前 artifact 是 `raw-simctl-framebuffer` 语义 +And metadata 应暴露 PNG `pixelWidth` / `pixelHeight`、`normalizationApplied` 与 `normalizationStrategy` +And schema contract 应声明 `host.simulator-screenshot-metadata`,避免 agent 将 raw framebuffer 当成 display-normalized screenshot + +## 实现记录 + +- `HostActionOutput` 增加可选 `screenshot` metadata。 +- `triton sim screenshot` 写出单个 artifact 时读取 PNG IHDR 宽高,并返回 metadata。 +- 当前最小修复采用 `metadata-only` 策略,不对 PNG 做旋转归一化,避免在缺少可靠 display orientation 来源时产生二次误导。 +- schema 的 `sim` 命令输出语义与 output contract 同步说明 raw framebuffer orientation。 + +## 验证 + +在 issue worktree 目录名不是 `TritonKit` 时,SwiftPM local package identity 会把根包识别成 worktree slug,导致 `CLI/Package.swift` 中 `package: "tritonkit"` 依赖解析失败。因此测试通过 `/tmp/tritonkit-issue26-copy/TritonKit` 临时副本执行。 + +已运行: + +```sh +swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter DeviceCrossPlatformTests.simulatorScreenshotMetadataDocumentsRawFramebufferOrientationSemantics +swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.hostWorkflowSchemasExposeTargetAndArtifactContracts +swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractsExposeNonemptyFields +swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractKindsStayWithinAgentTaxonomy +swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractModelsStayMachineReadable +swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractSelectorsAndKindsUseStableAgentKeys +``` + +结果均通过。 + +## 剩余风险 + +- 本次没有实现图像旋转归一化;输出仍保持 `xcrun simctl io screenshot` 原始 framebuffer。该选择是为了先让 agent/evidence consumer 具备机器可读方向语义,后续若能可靠取得当前 display orientation,可另起需求实现归一化。 +- 未在真实 iPad mini Simulator 上复测,因为当前任务收口以 CLI schema/model 单元测试为主。