Skip to content

Commit 2cf63de

Browse files
authored
Preserve instance_id and execution_id in OrchestrationFailed events (#36)
append_failed hardcoded placeholder identifiers ( and 0) that callers never overwrote, so every terminal OrchestrationFailed event (both the Failed and Cancelled paths, plus the infra-ack-failure and poison paths) serialized an empty instance_id and execution_id=0 into its payload. This broke payload-only correlation for telemetry/diagnostic consumers. Thread the real instance_id/execution_id through append_failed at all four call sites, matching the sibling Completed/ContinueAsNew paths, and add a regression assertion to the cancellation sample. Fixes #35
1 parent 439e045 commit 2cf63de

4 files changed

Lines changed: 27 additions & 9 deletions

File tree

src/runtime/dispatchers/orchestration.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ impl Runtime {
974974

975975
// Create a new history manager to append the failure event
976976
let mut failure_history_mgr = HistoryManager::from_history(&item.history);
977-
failure_history_mgr.append_failed(infra_error.clone());
977+
failure_history_mgr.append_failed(&item.instance, item.execution_id, infra_error.clone());
978978

979979
// Try to commit the failure event
980980
let failure_delta = failure_history_mgr.delta().to_vec();
@@ -1356,7 +1356,7 @@ impl Runtime {
13561356
}
13571357
}
13581358

1359-
history_mgr.append_failed(error.clone());
1359+
history_mgr.append_failed(&item.instance, item.execution_id, error.clone());
13601360

13611361
let metadata = ExecutionMetadata {
13621362
status: Some("Failed".to_string()),

src/runtime/execution.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ impl Runtime {
273273
Self::emit_terminal_cancellation_breadcrumbs(history_mgr, instance, execution_id, &outstanding);
274274

275275
// Add failure event last
276-
history_mgr.append_failed(details.clone());
276+
history_mgr.append_failed(instance, execution_id, details.clone());
277277

278278
// Notify parent if this is a sub-orchestration
279279
if let Some((parent_instance, parent_id)) = parent_link {
@@ -354,7 +354,7 @@ impl Runtime {
354354
Self::emit_terminal_cancellation_breadcrumbs(history_mgr, instance, execution_id, &outstanding);
355355

356356
// Add failure event last, and propagate cancellation to outstanding sub-orchestrations.
357-
history_mgr.append_failed(details.clone());
357+
history_mgr.append_failed(instance, execution_id, details.clone());
358358

359359
for (_schedule_id, child_suffix) in &outstanding.sub_orchestrations {
360360
orchestrator_items.push(WorkItem::CancelInstance {

src/runtime/state_helpers.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,14 +167,12 @@ impl HistoryManager {
167167
}
168168

169169
/// Append an OrchestrationFailed event with the next event_id
170-
pub fn append_failed(&mut self, details: crate::ErrorDetails) {
170+
pub fn append_failed(&mut self, instance_id: &str, execution_id: u64, details: crate::ErrorDetails) {
171171
let next_id = self.next_event_id();
172-
// Note: instance_id and execution_id should be set from context
173-
// For now, use placeholder values as this will be set by the caller
174172
self.append(Event::with_event_id(
175173
next_id,
176-
"", // instance_id should be set by caller
177-
0, // execution_id should be set by caller
174+
instance_id,
175+
execution_id,
178176
None,
179177
EventKind::OrchestrationFailed { details },
180178
));

tests/e2e_samples.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,26 @@ async fn sample_cancellation_parent_cascades_to_children_fs() {
13601360
.await;
13611361
assert!(ok, "timeout waiting for parent cancel failure");
13621362

1363+
// Regression: the terminal OrchestrationFailed event must preserve the
1364+
// instance_id/execution_id so telemetry pipelines reading event payloads
1365+
// can correlate the cancellation back to the workflow instance.
1366+
let parent_hist = store.read("inst-sample-cancel").await.unwrap_or_default();
1367+
let failed = parent_hist
1368+
.iter()
1369+
.rev()
1370+
.find(|e| matches!(&e.kind, EventKind::OrchestrationFailed { .. }))
1371+
.expect("parent history should contain OrchestrationFailed");
1372+
assert_eq!(
1373+
failed.instance_id,
1374+
"inst-sample-cancel",
1375+
"OrchestrationFailed must carry the instance_id"
1376+
);
1377+
assert_ne!(
1378+
failed.execution_id,
1379+
0,
1380+
"OrchestrationFailed must carry a non-placeholder execution_id"
1381+
);
1382+
13631383
// Find child instance (prefix is parent::sub::<id>) and check it was canceled too
13641384
let mgmt = store.as_management_capability().expect("ProviderAdmin required");
13651385
let children: Vec<String> = mgmt

0 commit comments

Comments
 (0)