|
| 1 | +# Copyright (c) Microsoft Corporation. |
| 2 | +# Licensed under the MIT license. |
| 3 | +"""Row 1 × Path C with a request-carried ``agent_reference`` (hosted-shaped input). |
| 4 | +
|
| 5 | +**Why this test exists (conformance gap closure).** |
| 6 | +
|
| 7 | +The hosted gateway injects an ``agent_reference`` onto every request, which the |
| 8 | +library normalizes into an :class:`AgentReference` *model* (a Mapping, but NOT |
| 9 | +``json.dumps``-serializable). That model flows into the durable-task input |
| 10 | +(``_start_durable_background`` -> ``start_durable`` -> ``_split_runtime_refs``). |
| 11 | +If it is persisted un-normalized, the core durable ``create_and_start`` -> |
| 12 | +``_resolve_input_storage`` size check raises |
| 13 | +``TypeError: Object of type AgentReference is not JSON serializable`` and the |
| 14 | +whole durable start **silently falls back to a non-durable ``asyncio.create_task``** |
| 15 | +— so no durable task exists and crash recovery never happens. |
| 16 | +
|
| 17 | +Every other durability test sends NO ``agent_reference`` (so |
| 18 | +``_normalize_agent_reference`` returns the ``{}`` sentinel, which is trivially |
| 19 | +serializable) or a plain string — so none of them exercised the model form and |
| 20 | +the bug shipped invisibly. This test mirrors the hosted condition: it puts an |
| 21 | +``agent_reference`` on the request and then crashes (Path C). Because durable |
| 22 | +start is **provider-agnostic**, the bug reproduces locally: if the model leaks |
| 23 | +into the durable input, the durable task is never created, the SIGKILL'd |
| 24 | +non-durable task is lost, and recovery never reaches ``completed`` — failing |
| 25 | +this test. With the fix (normalize model -> dict before persisting) the durable |
| 26 | +task is created and recovery completes. |
| 27 | +
|
| 28 | +Contract source: ``durability-contract.md`` § Per-row contracts → Row 1. |
| 29 | +""" |
| 30 | + |
| 31 | +from __future__ import annotations |
| 32 | + |
| 33 | +import asyncio |
| 34 | +from collections.abc import Callable |
| 35 | + |
| 36 | +import pytest |
| 37 | + |
| 38 | +from tests.e2e._crash_harness import CrashHarness |
| 39 | +from tests.e2e.durability_contract.conftest import ( |
| 40 | + LONG_GRACE_S, |
| 41 | + LONG_TIME_SECS, |
| 42 | + poll_until_terminal, |
| 43 | + post_and_get_response_id, |
| 44 | +) |
| 45 | + |
| 46 | +# A realistic hosted-shaped agent_reference. The library normalizes this dict |
| 47 | +# into an AgentReference MODEL (not a plain dict) on the way in, reproducing the |
| 48 | +# exact value the hosted gateway injects. |
| 49 | +_AGENT_REFERENCE = { |
| 50 | + "type": "agent_reference", |
| 51 | + "name": "durability-conformance-agent", |
| 52 | + "version": "1", |
| 53 | +} |
| 54 | + |
| 55 | + |
| 56 | +@pytest.mark.asyncio |
| 57 | +@pytest.mark.parametrize("stream", [False, True], ids=["stream=False", "stream=True"]) |
| 58 | +async def test_row_1_path_c_recovers_with_agent_reference( |
| 59 | + make_harness: Callable[..., CrashHarness], stream: bool |
| 60 | +) -> None: |
| 61 | + """A durable bg request carrying an ``agent_reference`` MUST still start a |
| 62 | + durable task and recover after SIGKILL. |
| 63 | +
|
| 64 | + Regression guard for the hosted ``AgentReference is not JSON serializable`` |
| 65 | + durable-start failure that silently degraded durable background responses to |
| 66 | + non-durable ``asyncio.create_task`` (no crash recovery). |
| 67 | + """ |
| 68 | + harness = make_harness( |
| 69 | + durable_background=True, |
| 70 | + handler_sleep_ms=int(LONG_TIME_SECS * 1000), |
| 71 | + shutdown_grace_seconds=LONG_GRACE_S, |
| 72 | + ) |
| 73 | + await harness.start() |
| 74 | + try: |
| 75 | + response_id = await post_and_get_response_id( |
| 76 | + harness.client, |
| 77 | + store=True, |
| 78 | + background=True, |
| 79 | + stream=stream, |
| 80 | + extra={"agent_reference": _AGENT_REFERENCE}, |
| 81 | + ) |
| 82 | + # Let the handler begin before the SIGKILL. |
| 83 | + await asyncio.sleep(0.5) |
| 84 | + |
| 85 | + await harness.kill() |
| 86 | + await harness.restart() |
| 87 | + |
| 88 | + # If agent_reference broke durable start, the SIGKILL'd asyncio fallback |
| 89 | + # left no durable record -> this never reaches "completed". |
| 90 | + terminal = await poll_until_terminal( |
| 91 | + harness.client, |
| 92 | + response_id, |
| 93 | + timeout_seconds=30.0, |
| 94 | + ) |
| 95 | + assert terminal["status"] == "completed", terminal |
| 96 | + finally: |
| 97 | + await harness.close() |
0 commit comments