Skip to content

Commit 6441b1e

Browse files
Conformance finding: sample-app full surface coverage remains unproved beyond deterministic smoke (#174)
1 parent c31d84c commit 6441b1e

4 files changed

Lines changed: 144 additions & 4 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ scripts/compose-conformance.sh --strict
134134
```
135135

136136
The harness emits a JSON document with the sample-app commit, artifact versions,
137-
timestamp, per-surface outcome, and any skipped surfaces. It runs the documented
137+
timestamp, per-surface outcome, focused findings, and any skipped surfaces. It runs the documented
138138
artisan samples, browser checks for the app and Waterline, the MCP workflow API,
139139
an API documentation check that compares the README's documented MCP tools and
140140
workflow keys with the live endpoint, a Waterline/manual observation check using

app/Console/Commands/Conformance.php

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,19 @@ public function handle(): int
165165
return self::FAILURE;
166166
}
167167

168+
if ($metadata['summary']['findings'] !== []) {
169+
$this->line('==> sample-app conformance: focused findings');
170+
foreach ($metadata['summary']['findings'] as $finding) {
171+
if (! is_array($finding)) {
172+
continue;
173+
}
174+
175+
$surface = is_string($finding['surface'] ?? null) ? $finding['surface'] : 'unknown';
176+
$impact = is_string($finding['impact'] ?? null) ? $finding['impact'] : 'No impact summary recorded.';
177+
$this->line("[finding] {$surface}: {$impact}");
178+
}
179+
}
180+
168181
$this->line('==> sample-app conformance: run metadata');
169182
$this->line($json);
170183

@@ -703,6 +716,7 @@ private function metadata(string $startedAt, string $baseUrl, bool $strict): arr
703716
$missing = array_values(array_diff(self::REQUIRED_SURFACES, array_keys($this->surfaces)));
704717
$uncovered = array_values(array_unique([...$skipped, ...$missing]));
705718
$status = $failed !== [] || $missing !== [] || ($strict && $skipped !== []) ? 'failed' : 'passed';
719+
$findings = $this->findings($failed, $skipped, $missing);
706720

707721
return [
708722
'schema' => 'durable-workflow.sample-app.conformance.run',
@@ -723,10 +737,88 @@ private function metadata(string $startedAt, string $baseUrl, bool $strict): arr
723737
'skipped_surfaces' => $skipped,
724738
'missing_surfaces' => $missing,
725739
'uncovered_surfaces' => $uncovered,
740+
'findings' => $findings,
726741
],
727742
];
728743
}
729744

745+
/**
746+
* @param list<string> $failed
747+
* @param list<string> $skipped
748+
* @param list<string> $missing
749+
* @return list<array<string, mixed>>
750+
*/
751+
private function findings(array $failed, array $skipped, array $missing): array
752+
{
753+
$findings = [];
754+
755+
foreach ($failed as $surface) {
756+
$result = $this->surfaces[$surface] ?? [];
757+
758+
$findings[] = [
759+
'surface' => $surface,
760+
'status' => 'failed',
761+
'impact' => $this->failedSurfaceImpact($surface),
762+
'command' => $result['command'] ?? null,
763+
'url' => $result['url'] ?? null,
764+
'exit_code' => $result['exit_code'] ?? null,
765+
'expected' => $result['expected'] ?? null,
766+
'evidence' => $this->surfaceEvidence($result),
767+
];
768+
}
769+
770+
foreach ($skipped as $surface) {
771+
$result = $this->surfaces[$surface] ?? [];
772+
$reason = is_string($result['reason'] ?? null) ? $result['reason'] : 'The surface was not exercised.';
773+
774+
$findings[] = [
775+
'surface' => $surface,
776+
'status' => 'skipped',
777+
'impact' => "The {$surface} sample surface was not exercised, so this run does not prove the documented user workflow.",
778+
'reason' => $reason,
779+
];
780+
}
781+
782+
foreach ($missing as $surface) {
783+
$findings[] = [
784+
'surface' => $surface,
785+
'status' => 'missing',
786+
'impact' => "The {$surface} sample surface did not report any result, so the harness did not cover the documented user workflow.",
787+
];
788+
}
789+
790+
return $findings;
791+
}
792+
793+
private function failedSurfaceImpact(string $surface): string
794+
{
795+
return match ($surface) {
796+
'ai_agent_scripted' => 'The scripted AI travel-agent sample did not complete as documented; users following the repeatable app:ai command can see a nonzero exit or miss the expected booking confirmation.',
797+
'ai_failure_hotel', 'ai_failure_flight', 'ai_failure_car' => 'The AI travel-agent failure-mode sample did not demonstrate compensation; users cannot verify that failed bookings roll back as documented.',
798+
'prism_ai' => 'The Prism AI sample did not produce a generated user; users with AI credentials cannot confirm the documented durable AI loop.',
799+
'mcp_workflow_api' => 'The MCP workflow API did not start and observe a sample workflow to completion; AI clients cannot rely on the documented tool flow.',
800+
'waterline_operator_dashboard' => 'The Waterline dashboard was not reachable with the expected operator content; users cannot inspect workflow state through the documented UI.',
801+
'waterline_manual_observation' => 'The manual observation path did not export completed workflow history; operators cannot verify the documented Waterline/history inspection flow.',
802+
default => "The {$surface} sample surface failed, so a user following the documented sample path would not see the expected successful result.",
803+
};
804+
}
805+
806+
/**
807+
* @param array<string, mixed> $result
808+
*/
809+
private function surfaceEvidence(array $result): ?string
810+
{
811+
foreach (['error', 'stderr_tail', 'stdout_tail', 'body_tail'] as $key) {
812+
$value = $result[$key] ?? null;
813+
814+
if (is_string($value) && trim($value) !== '') {
815+
return $value;
816+
}
817+
}
818+
819+
return null;
820+
}
821+
730822
/**
731823
* @return array<string, string|null>
732824
*/

scripts/resolve-current-artifacts.sh

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env bash
22
set -euo pipefail
33

4-
default_server_image="durableworkflow/server:0.2.188"
4+
default_server_image="durableworkflow/server:0.2.190"
55
default_cli_version="0.1.67"
66
default_python_sdk_version="0.4.78"
77
default_workflow_version="2.0.0-alpha.177"
@@ -71,6 +71,50 @@ process.stdin.on("end", () => {
7171
printf '%s\n' "$fallback"
7272
}
7373

74+
latest_dockerhub_server_image() {
75+
local fallback="$1"
76+
local repository="${fallback%:*}"
77+
local latest
78+
79+
if ! command -v curl >/dev/null 2>&1 || ! command -v node >/dev/null 2>&1; then
80+
printf '%s\n' "$fallback"
81+
return 0
82+
fi
83+
84+
if latest="$(
85+
curl -fsSL --retry 2 --connect-timeout 5 --max-time 15 \
86+
"https://registry.hub.docker.com/v2/repositories/${repository}/tags?page_size=100" 2>/dev/null \
87+
| node -e '
88+
let raw = "";
89+
process.stdin.setEncoding("utf8");
90+
process.stdin.on("data", chunk => { raw += chunk; });
91+
process.stdin.on("end", () => {
92+
const payload = JSON.parse(raw);
93+
const tags = Array.isArray(payload.results) ? payload.results : [];
94+
const versions = tags
95+
.map(tag => typeof tag.name === "string" ? tag.name : "")
96+
.map(name => {
97+
const match = /^0\.2\.(\d+)$/.exec(name);
98+
return match ? { name, patch: Number(match[1]) } : null;
99+
})
100+
.filter(Boolean)
101+
.sort((a, b) => b.patch - a.patch);
102+
103+
if (versions.length === 0) {
104+
process.exit(1);
105+
}
106+
107+
process.stdout.write(versions[0].name);
108+
});
109+
' 2>/dev/null
110+
)" && [[ "$latest" =~ ^0\.2\.[0-9]+$ ]]; then
111+
printf '%s:%s\n' "$repository" "$latest"
112+
return 0
113+
fi
114+
115+
printf '%s\n' "$fallback"
116+
}
117+
74118
normalize_cli_pin() {
75119
local pin="$1"
76120
local version="$2"
@@ -92,7 +136,7 @@ normalize_cli_pin() {
92136
printf '%s\n' "$pin"
93137
}
94138

95-
server_image="${DURABLE_SERVER_IMAGE:-$default_server_image}"
139+
server_image="${DURABLE_SERVER_IMAGE:-$(latest_dockerhub_server_image "$default_server_image")}"
96140
server_version="$(semantic_version_from_text "$server_image")"
97141
server_version="${server_version:-$(semantic_version_from_text "$default_server_image")}"
98142

tests/Feature/SampleTeachingMaterialTest.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ public function test_full_conformance_harness_is_public_and_names_required_surfa
9797
$this->assertStringContainsString('SAMPLE_APP_SMOKE_ONLY=1', $readme);
9898
$this->assertStringContainsString('API documentation check', $readme);
9999
$this->assertStringContainsString('Waterline/manual observation check', $readme);
100+
$this->assertStringContainsString('focused findings', $readme);
100101
$this->assertStringContainsString('--booking-plan-json', $readme);
101102
$this->assertStringContainsString('app:conformance', $script);
102103
$this->assertStringContainsString('SAMPLE_APP_CONFORMANCE_URL:-http://app:8000', $script);
@@ -112,7 +113,8 @@ public function test_full_conformance_harness_is_public_and_names_required_surfa
112113
$this->assertStringContainsString('git rev-parse HEAD', $script);
113114
$this->assertStringContainsString('SAMPLE_APP_COMMIT="${sample_app_commit}"', $script);
114115
$this->assertStringContainsString('scripts/resolve-current-artifacts.sh', $script);
115-
$this->assertStringContainsString('default_server_image="durableworkflow/server:0.2.188"', $artifactResolver);
116+
$this->assertStringContainsString('default_server_image="durableworkflow/server:0.2.190"', $artifactResolver);
117+
$this->assertStringContainsString('latest_dockerhub_server_image', $artifactResolver);
116118
$this->assertStringContainsString('default_python_sdk_version="0.4.78"', $artifactResolver);
117119
$this->assertStringContainsString('default_workflow_version="2.0.0-alpha.177"', $artifactResolver);
118120
$this->assertStringContainsString('default_waterline_version="2.0.0-alpha.64"', $artifactResolver);
@@ -129,6 +131,8 @@ public function test_full_conformance_harness_is_public_and_names_required_surfa
129131
$this->assertStringContainsString('required_surfaces', $command);
130132
$this->assertStringContainsString('missing_surfaces', $command);
131133
$this->assertStringContainsString('uncovered_surfaces', $command);
134+
$this->assertStringContainsString('focused findings', $command);
135+
$this->assertStringContainsString('failedSurfaceImpact', $command);
132136
$this->assertStringContainsString('api_documentation', $command);
133137
$this->assertStringContainsString('runApiDocumentationSurface', $command);
134138
$this->assertStringContainsString('get_workflow_history', $command);

0 commit comments

Comments
 (0)