Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions CLI/Tests/TritonKitCLITests/DeviceCrossPlatformTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,26 @@ struct DeviceCrossPlatformTests {
}
}

@Test("sim screenshot metadata documents raw framebuffer orientation semantics")
func simulatorScreenshotMetadataDocumentsRawFramebufferOrientationSemantics() throws {
let temp = FileManager.default.temporaryDirectory
.appendingPathComponent("triton-sim-screenshot-metadata-\(UUID().uuidString)")
try FileManager.default.createDirectory(at: temp, withIntermediateDirectories: true)
let path = temp.appendingPathComponent("shot.png")
try writeMinimalPNG(width: 768, height: 1024, to: path)

let metadata = try makeSimulatorScreenshotMetadata(outputPath: path.path)

#expect(metadata.path == path.path)
#expect(metadata.contentType == "image/png")
#expect(metadata.pixelWidth == 768)
#expect(metadata.pixelHeight == 1024)
#expect(metadata.orientationSemantics == "raw-simctl-framebuffer")
#expect(metadata.normalizationApplied == false)
#expect(metadata.normalizationStrategy == "metadata-only")
#expect(metadata.note.contains("raw framebuffer"))
}

@Test("app and smoke schemas expose unified device selector with explicit selector forms")
func appAndSmokeSchemasExposeUnifiedDeviceSelector() throws {
let app = try #require(commandSchemas().first { $0.name == "app" })
Expand Down Expand Up @@ -358,3 +378,20 @@ private func iosTarget(
transport: nil
)
}

private func writeMinimalPNG(width: UInt32, height: UInt32, to url: URL) throws {
var data = Data([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])
data.append(contentsOf: [0x00, 0x00, 0x00, 0x0D])
data.append(contentsOf: [0x49, 0x48, 0x44, 0x52])
data.append(UInt8((width >> 24) & 0xff))
data.append(UInt8((width >> 16) & 0xff))
data.append(UInt8((width >> 8) & 0xff))
data.append(UInt8(width & 0xff))
data.append(UInt8((height >> 24) & 0xff))
data.append(UInt8((height >> 16) & 0xff))
data.append(UInt8((height >> 8) & 0xff))
data.append(UInt8(height & 0xff))
data.append(contentsOf: [8, 6, 0, 0, 0])
data.append(contentsOf: [0, 0, 0, 0])
try data.write(to: url, options: .atomic)
}
3 changes: 2 additions & 1 deletion CLI/Tests/TritonKitCLITests/SchemaFactSourceTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3812,7 +3812,7 @@ struct SchemaFactSourceTests {
])
expectContract(sim, selector: "host.simulator-action", fields: [
"ok", "action", "runtimeScope", "target", "tool", "exitCode", "riskLevel",
"sourceCommand", "stdoutTruncated", "stderrTruncated", "artifacts", "note",
"sourceCommand", "stdoutTruncated", "stderrTruncated", "artifacts", "screenshot", "note",
])

#expect(app.failureCodes.contains("app_launch_failed"))
Expand Down Expand Up @@ -4041,6 +4041,7 @@ private func outputContractKindTaxonomy() -> Set<String> {
"host-device-ready",
"host-device-selection",
"host-simulator-list",
"host-simulator-screenshot-metadata",
"input-batch-summary",
"input-result",
"node-attributes",
Expand Down
14 changes: 14 additions & 0 deletions Sources/TritonKitCLI/CLIHostModels.swift
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,17 @@ struct HostSimulatorReadyEvent: Encodable {
let sourceCommand: String?
}

struct HostSimulatorScreenshotMetadata: Encodable, Equatable {
let path: String
let contentType: String
let pixelWidth: Int?
let pixelHeight: Int?
let orientationSemantics: String
let normalizationApplied: Bool
let normalizationStrategy: String
let note: String
}

struct HostActionOutput: Encodable {
let ok: Bool
let action: String
Expand All @@ -474,6 +485,7 @@ struct HostActionOutput: Encodable {
let stdout: String?
let stderr: String?
let artifacts: [String]
let screenshot: HostSimulatorScreenshotMetadata?
let note: String?

init(
Expand All @@ -491,6 +503,7 @@ struct HostActionOutput: Encodable {
stdout: String?,
stderr: String?,
artifacts: [String],
screenshot: HostSimulatorScreenshotMetadata? = nil,
note: String?
) {
self.ok = ok
Expand All @@ -507,6 +520,7 @@ struct HostActionOutput: Encodable {
self.stdout = stdout
self.stderr = stderr
self.artifacts = artifacts
self.screenshot = screenshot
self.note = note
}
}
Expand Down
33 changes: 33 additions & 0 deletions Sources/TritonKitCLI/CLIHostRuntime.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func runSimpleHostCommand(
stdout: stdout.isEmpty ? nil : stdout,
stderr: stderr.isEmpty ? nil : stderr,
artifacts: artifacts,
screenshot: action == "sim.screenshot" && artifacts.count == 1 ? (try? makeSimulatorScreenshotMetadata(outputPath: artifacts[0])) : nil,
note: note
)
switch outputFormat {
Expand Down Expand Up @@ -1545,3 +1546,35 @@ func failHostValidation(code: String, message: String, hint: String, outputForma
}
throw ExitCode.failure
}

func makeSimulatorScreenshotMetadata(outputPath: String) throws -> HostSimulatorScreenshotMetadata {
let dimensions = try? readPNGDimensions(path: outputPath)
return HostSimulatorScreenshotMetadata(
path: outputPath,
contentType: "image/png",
pixelWidth: dimensions?.width,
pixelHeight: dimensions?.height,
orientationSemantics: "raw-simctl-framebuffer",
normalizationApplied: false,
normalizationStrategy: "metadata-only",
note: "TritonKit preserves the raw framebuffer orientation emitted by `xcrun simctl io screenshot`; compare pixelWidth/pixelHeight and simulator display state before treating this artifact as a display-normalized screenshot."
)
}

private func readPNGDimensions(path: String) throws -> (width: Int, height: Int) {
let url = URL(fileURLWithPath: path)
let data = try Data(contentsOf: url, options: [.mappedIfSafe])
guard data.count >= 24 else {
throw RuntimeError("Screenshot metadata could not be read: PNG file is too short.")
}
let signature: [UInt8] = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]
guard Array(data.prefix(8)) == signature else {
throw RuntimeError("Screenshot metadata could not be read: output is not a PNG file.")
}
guard String(data: data[12..<16], encoding: .ascii) == "IHDR" else {
throw RuntimeError("Screenshot metadata could not be read: PNG IHDR chunk is missing.")
}
let width = data[16..<20].reduce(UInt32(0)) { ($0 << 8) | UInt32($1) }
let height = data[20..<24].reduce(UInt32(0)) { ($0 << 8) | UInt32($1) }
return (Int(width), Int(height))
Comment on lines +1564 to +1579
}
20 changes: 20 additions & 0 deletions Sources/TritonKitCLI/CLISchemaContracts.swift
Original file line number Diff line number Diff line change
Expand Up @@ -698,11 +698,31 @@ func hostActionOutputContract(selector: String, model: String) -> TKCommandOutpu
("stdout", "String?", false, "Bounded stdout sample"),
("stderr", "String?", false, "Bounded stderr sample"),
("artifacts", "[String]", true, "Written artifact paths"),
("screenshot", "HostSimulatorScreenshotMetadata?", false, "Simulator screenshot orientation and pixel metadata"),
("note", "String?", false, "Boundary or follow-up note"),
])
)
}

func hostSimulatorScreenshotMetadataOutputContract() -> TKCommandOutputContract {
TKCommandOutputContract(
selector: "host.simulator-screenshot-metadata",
format: "json",
kind: "host-simulator-screenshot-metadata",
model: "HostSimulatorScreenshotMetadata",
fields: schemaContractFields([
("path", "String", true, "Screenshot artifact path"),
("contentType", "String", true, "Screenshot content type"),
("pixelWidth", "Int?", false, "PNG pixel width when readable"),
("pixelHeight", "Int?", false, "PNG pixel height when readable"),
("orientationSemantics", "String", true, "Screenshot orientation coordinate-space semantics"),
("normalizationApplied", "Bool", true, "Whether TritonKit rotated or otherwise normalized the image"),
("normalizationStrategy", "String", true, "Normalization strategy used for this artifact"),
("note", "String", true, "Human-readable caveat for agents and evidence consumers"),
])
)
}

func hostHarmonyTapOutputContract() -> TKCommandOutputContract {
TKCommandOutputContract(
selector: "host.harmony-tap",
Expand Down
5 changes: 3 additions & 2 deletions Sources/TritonKitCLI/CLISchemaHostCommands.swift
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ func hostCommandSchemas() -> [TKCommandSchema] {
"triton sim runtime delete <runtime-id> --dry-run --json",
"triton sim personalization scan-and-personalize --json",
],
successShape: "{ ok, simulators[] } or { ok, runtimes[], count, verbose, sourceCommand } or { ok, action, simulator?, defaultsPath? } or { ok, action, runtimeScope, target, tool, exitCode, sourceCommand, stdout?, stderr?, stdoutTruncated?, stderrTruncated?, artifacts[], note? } or { ok, action, artifact, stdoutBytes, stderrBytes, stdoutTruncated, stderrTruncated } or JSONL { ok, action, state, ready, attempt, elapsedMs }",
successShape: "{ ok, simulators[] } or { ok, runtimes[], count, verbose, sourceCommand } or { ok, action, simulator?, defaultsPath? } or { ok, action, runtimeScope, target, tool, exitCode, sourceCommand, stdout?, stderr?, stdoutTruncated?, stderrTruncated?, artifacts[], screenshot?, note? } or { ok, action, artifact, stdoutBytes, stderrBytes, stdoutTruncated, stderrTruncated } or JSONL { ok, action, state, ready, attempt, elapsedMs }",
failureShape: "{ ok:false, error:{ code, message, hint, nextAction? } }",
outputSemantics: "Use sim for Apple Simulator host control and maintenance. Destructive operations require explicit confirm flags; agents should resolve/use a simulator before app or smoke flows.",
outputSemantics: "Use sim for Apple Simulator host control and maintenance. Destructive operations require explicit confirm flags; agents should resolve/use a simulator before app or smoke flows. sim screenshot preserves simctl raw framebuffer orientation and returns screenshot metadata so agents do not assume display-normalized orientation.",
artifacts: ["simulator-screenshot", "simulator-video", "simulator-logs", "simulator-diagnostics"],
nextCommands: [
"triton sim use <udid> --json",
Expand All @@ -189,6 +189,7 @@ func hostCommandSchemas() -> [TKCommandSchema] {
outputContracts: [
hostSimulatorListOutputContract(),
hostActionOutputContract(selector: "host.simulator-action", model: "HostActionOutput|HostArtifactCaptureOutput|HostSimulatorUseOutput|HostSimulatorReadyEvent"),
hostSimulatorScreenshotMetadataOutputContract(),
],
failureCodes: [
"simulator_not_found",
Expand Down
7 changes: 7 additions & 0 deletions docs-linhay/memory/2026-06-04.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@

- 为 `docs-linhay/spaces/20260604-issue-27-agent-skills-readme/` 建立执行计划,验收聚焦根 README 是否明确可选 Codex / agent skills 的 public/internal 边界。
- 根 README 新增 `Optional Agent Skills` 章节:外部用户只安装 `.agents/tritonkit-skills/public/` 或 release asset `tritonkit-skills.tar.gz` 中的 public skills;当前 public skills 为 `tritonkit-dev-feedback`、`tritonkit-emulator-cli-takeover`、`tritonkit-real-project-regression`;`.agents/tritonkit-skills/internal/` 仅用于 TritonKit repo maintenance,不默认安装到 adopting projects;安装后需重启 Codex / agent session。

## Issue 26 simulator screenshot orientation metadata

- 为 `triton sim screenshot --json` 增加可选 `screenshot` metadata,语义为 `raw-simctl-framebuffer`,并暴露 PNG `pixelWidth` / `pixelHeight`、`normalizationApplied=false`、`normalizationStrategy=metadata-only`。
- schema contract 新增 `host.simulator-screenshot-metadata`,`sim` output semantics 明确截图保持 `xcrun simctl io screenshot` 原始 framebuffer orientation,agent 不应默认视为 display-normalized。
- 当前未做 PNG 旋转归一化;若后续能可靠取得 display orientation,可另建需求把 metadata-only 演进为 normalized artifact。
- 注意:feature worktree 目录名不是 `TritonKit` 时,`swift test --package-path CLI` 会因 SwiftPM local package identity 失败;本轮通过 `/tmp/.../TritonKit` 临时副本验证 CLI tests。
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Issue 26 - triton sim screenshot iPad mini orientation

## 背景

GitHub Issue #26 报告:在 iPad mini (A17 Pro) Simulator 上执行 `triton sim screenshot` 后,生成 PNG 内容相对用户看到的模拟器显示方向旋转 90 度。当前 JSON 输出只透传 simctl command/stderr,未归一化方向,也未暴露 orientation metadata。

## 目标

- 调查 `triton sim screenshot` host-side 实现与 JSON 输出契约。
- 通过测试覆盖 iPad/simctl screenshot 方向相关行为。
- 在不扩大产品边界的前提下,给出最小修复:优先让输出匹配可预期显示方向;若无法可靠归一化,则至少在 JSON 中暴露明确 orientation/display metadata 与文档说明。

## 非目标

- 不新增 Web/Wails UI。
- 不接入真机、远端 agent、设备云或内置 VLM loop。
- 不直接依赖 XcodeBuildMCP 对外 API;Triton CLI/HTTP schema 仍是 agent 入口。

## BDD 场景与验收

### 场景:agent 获取 simulator screenshot 结果时能判断方向语义

Given 一个 iPad Simulator 已启动并可截图
When agent 执行 `triton sim screenshot --simulator <UDID> --output <png> --json`
Then 命令应成功写出 PNG
And JSON 输出应让 agent 明确知道截图方向是否已归一化或包含足够 metadata 判断方向
And 文档应说明该行为,避免把 raw framebuffer orientation 误当最终证据方向

## 相关链接

- GitHub Issue: https://github.com/NeptuneKit/TritonKit/issues/26
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Issue 26 Implementation Plan

## 验收场景

Given 一个 iPad Simulator 已启动并可截图
When agent 执行 `triton sim screenshot --simulator <UDID> --output <png> --json`
Then 命令应成功写出 PNG
And JSON 输出应包含 `screenshot` metadata,明确当前 artifact 是 `raw-simctl-framebuffer` 语义
And metadata 应暴露 PNG `pixelWidth` / `pixelHeight`、`normalizationApplied` 与 `normalizationStrategy`
And schema contract 应声明 `host.simulator-screenshot-metadata`,避免 agent 将 raw framebuffer 当成 display-normalized screenshot

## 实现记录

- `HostActionOutput` 增加可选 `screenshot` metadata。
- `triton sim screenshot` 写出单个 artifact 时读取 PNG IHDR 宽高,并返回 metadata。
- 当前最小修复采用 `metadata-only` 策略,不对 PNG 做旋转归一化,避免在缺少可靠 display orientation 来源时产生二次误导。
- schema 的 `sim` 命令输出语义与 output contract 同步说明 raw framebuffer orientation。

## 验证

在 issue worktree 目录名不是 `TritonKit` 时,SwiftPM local package identity 会把根包识别成 worktree slug,导致 `CLI/Package.swift` 中 `package: "tritonkit"` 依赖解析失败。因此测试通过 `/tmp/tritonkit-issue26-copy/TritonKit` 临时副本执行。

已运行:

```sh
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter DeviceCrossPlatformTests.simulatorScreenshotMetadataDocumentsRawFramebufferOrientationSemantics
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.hostWorkflowSchemasExposeTargetAndArtifactContracts
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractsExposeNonemptyFields
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractKindsStayWithinAgentTaxonomy
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractModelsStayMachineReadable
swift test --package-path /tmp/tritonkit-issue26-copy/TritonKit/CLI --filter SchemaFactSourceTests.schemaOutputContractSelectorsAndKindsUseStableAgentKeys
```

结果均通过。

## 剩余风险

- 本次没有实现图像旋转归一化;输出仍保持 `xcrun simctl io screenshot` 原始 framebuffer。该选择是为了先让 agent/evidence consumer 具备机器可读方向语义,后续若能可靠取得当前 display orientation,可另起需求实现归一化。
- 未在真实 iPad mini Simulator 上复测,因为当前任务收口以 CLI schema/model 单元测试为主。
Loading