Skip to content

Commit fb8e34c

Browse files
committed
fix: url-decode path segments in route layer
After PR #216, durable-execution ARNs minted by Execution.new() contain a literal '/' of the form "<uuid>/<invocation-id>". boto's rest-json serializer percent-encodes '/' as %2F in the non-greedy {DurableExecutionArn} URI label, so paths arriving at the local WebServer look like: /2025-12-01/durable-executions/<uuid>%2F<invocation-id> The same shape applies to ListDurableExecutionsByFunction with function names like "MyFunction:$LATEST" (':' -> %3A, '$' -> %24). Without decoding, store lookups never match the key and every Get/State/History/Checkpoint/Stop returns 404. List queries silently return an empty result set. - Decode each segment once in Route.from_string. raw_path is kept as the original wire string for logging. Splitting on '/' happens before decoding so a captured value containing %2F stays inside its segment instead of acting as a path separator. - Remove the now-redundant per-route unquote() calls from the three callback routes (added in #117 for the same bug shape). - Add a real-boto regression test under tests/web/e2e/ that drives a live WebServer for every affected operation with values containing the characters boto percent-encodes. Closes the test-coverage gap that let the bug ship. - Strengthen test_route_with_special_characters to assert both segments[N] and the named field are decoded while raw_path keeps the wire form. Affects users running WebRunner / dex-local-runner against their durable function in RIE; pre-fix, the function 404s on its first checkpoint after upgrading to 1.2.0. Closes #222
1 parent 632ab8d commit fb8e34c

3 files changed

Lines changed: 272 additions & 11 deletions

File tree

src/aws_durable_execution_sdk_python_testing/web/routes.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,22 @@ def from_route(cls, _route: Route) -> Route:
3939
def from_string(cls, path: str) -> Route:
4040
"""Create a Route from a string.
4141
42+
Each segment is URL-decoded; ``raw_path`` is preserved as the
43+
original wire path. Splitting on ``/`` happens before decoding so
44+
that an encoded ``%2F`` inside a captured value (e.g. an ARN that
45+
contains ``/``) stays inside its segment instead of being treated
46+
as a path separator.
47+
4248
Args:
4349
path: The raw path string
4450
4551
Returns:
46-
Route instance with parsed segments
52+
Route instance with parsed, URL-decoded segments
4753
"""
48-
# Remove leading/trailing slashes and split into segments
49-
segments = [s for s in path.strip("/").split("/") if s]
54+
# Remove leading/trailing slashes, split on '/', then URL-decode each
55+
# segment. Order matters: split on the literal '/' first so '%2F'-
56+
# encoded slashes inside values don't act as separators.
57+
segments = [unquote(s) for s in path.strip("/").split("/") if s]
5058
return cls(raw_path=path, segments=segments)
5159

5260
def matches_pattern(self, pattern: list[str]) -> bool:
@@ -445,7 +453,7 @@ def from_route(cls, route: Route) -> CallbackSuccessRoute:
445453
return cls(
446454
raw_path=route.raw_path,
447455
segments=route.segments,
448-
callback_id=unquote(route.segments[2]),
456+
callback_id=route.segments[2],
449457
)
450458

451459

@@ -488,7 +496,7 @@ def from_route(cls, route: Route) -> CallbackFailureRoute:
488496
return cls(
489497
raw_path=route.raw_path,
490498
segments=route.segments,
491-
callback_id=unquote(route.segments[2]),
499+
callback_id=route.segments[2],
492500
)
493501

494502

@@ -531,7 +539,7 @@ def from_route(cls, route: Route) -> CallbackHeartbeatRoute:
531539
return cls(
532540
raw_path=route.raw_path,
533541
segments=route.segments,
534-
callback_id=unquote(route.segments[2]),
542+
callback_id=route.segments[2],
535543
)
536544

537545

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
"""Integration test: WebServer route layer URL-decodes DurableExecutionArn.
2+
3+
Drives a real ``boto3`` Lambda client against a live ``WebServer`` and asserts
4+
that ``DurableExecutionArn`` values containing characters that boto
5+
percent-encodes in URI labels (e.g. ``/`` -> ``%2F``) round-trip correctly so
6+
the store lookup hits.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
import threading
12+
import time
13+
from typing import Any
14+
15+
import boto3 # type: ignore
16+
import pytest
17+
from botocore.config import Config # type: ignore
18+
from botocore.exceptions import ClientError # type: ignore
19+
20+
from aws_durable_execution_sdk_python_testing.checkpoint.processor import (
21+
CheckpointProcessor,
22+
)
23+
from aws_durable_execution_sdk_python_testing.execution import Execution
24+
from aws_durable_execution_sdk_python_testing.executor import Executor
25+
from aws_durable_execution_sdk_python_testing.model import (
26+
StartDurableExecutionInput,
27+
)
28+
from aws_durable_execution_sdk_python_testing.scheduler import Scheduler
29+
from aws_durable_execution_sdk_python_testing.stores.memory import (
30+
InMemoryExecutionStore,
31+
)
32+
from aws_durable_execution_sdk_python_testing.web.server import (
33+
WebServer,
34+
WebServiceConfig,
35+
)
36+
37+
38+
class _NoOpInvoker:
39+
"""Satisfies the Invoker protocol without invoking anything.
40+
41+
The route-layer regression doesn't depend on actually executing the
42+
function; the executor just needs *some* invoker to construct it.
43+
"""
44+
45+
def create_invocation_input(self, execution: Any) -> Any: # noqa: ARG002
46+
return None
47+
48+
def invoke(self, *args: Any, **kwargs: Any) -> Any: # noqa: ARG002
49+
return None
50+
51+
def update_endpoint(self, *args: Any, **kwargs: Any) -> None: # noqa: ARG002
52+
return None
53+
54+
55+
def _assert_no_percent_encoding_in_error(exc: ClientError, arn: str) -> None:
56+
"""Fail the test if a ResourceNotFoundException carries a %2F-form ARN.
57+
58+
Other errors (e.g. invalid checkpoint token, wrong state) are fine; this
59+
test is narrowly about whether the route layer decoded the path segment.
60+
"""
61+
msg = str(exc)
62+
assert "%2F" not in msg, (
63+
f"WebServer route layer did not URL-decode DurableExecutionArn. "
64+
f"Original ARN: {arn!r}. Error: {msg}"
65+
)
66+
67+
68+
@pytest.fixture
69+
def server_with_slash_arn():
70+
"""Yield ``(boto_client, arn, executor, store)`` for a live WebServer.
71+
72+
The yielded ARN contains a literal ``/`` matching the v1.2.0+ format
73+
produced by ``Execution.new()``. The Execution is pre-started and saved
74+
so read paths have something to find.
75+
"""
76+
store = InMemoryExecutionStore()
77+
scheduler = Scheduler()
78+
checkpoint_processor = CheckpointProcessor(store=store, scheduler=scheduler)
79+
executor = Executor(
80+
store=store,
81+
scheduler=scheduler,
82+
invoker=_NoOpInvoker(),
83+
checkpoint_processor=checkpoint_processor,
84+
)
85+
checkpoint_processor.add_execution_observer(executor)
86+
scheduler.start()
87+
88+
# Hand-build a started Execution whose ARN contains '/' so we control
89+
# the format under test without going through executor.start_execution
90+
# (which schedules a real invoke + timeout).
91+
start_input = StartDurableExecutionInput(
92+
account_id="123456789012",
93+
function_name="test-fn",
94+
function_qualifier="$LATEST",
95+
execution_name="test-exec",
96+
execution_timeout_seconds=300,
97+
execution_retention_period_days=7,
98+
invocation_id="inv-12345",
99+
input='"hi"',
100+
)
101+
execution = Execution.new(start_input)
102+
execution.start()
103+
store.save(execution)
104+
arn = execution.durable_execution_arn
105+
assert "/" in arn, "regression precondition: ARN must contain literal '/'"
106+
107+
config = WebServiceConfig(host="127.0.0.1", port=0)
108+
server = WebServer(config, executor)
109+
port = server.server_address[1]
110+
server_thread = threading.Thread(target=server.serve_forever, daemon=True)
111+
server_thread.start()
112+
# Give the listener a beat to come up before the boto client connects.
113+
time.sleep(0.05)
114+
115+
client = boto3.client(
116+
"lambda",
117+
endpoint_url=f"http://127.0.0.1:{port}",
118+
region_name="us-east-1",
119+
aws_access_key_id="x", # noqa: S106 - test stub
120+
aws_secret_access_key="y", # noqa: S106 - test stub
121+
config=Config(parameter_validation=False, retries={"max_attempts": 0}),
122+
)
123+
124+
try:
125+
yield client, arn, executor, store
126+
finally:
127+
server.shutdown()
128+
server.server_close()
129+
scheduler.stop()
130+
131+
132+
def test_get_durable_execution_decodes_slash_in_arn(server_with_slash_arn):
133+
"""GetDurableExecution: %2F must be decoded so the store lookup hits."""
134+
client, arn, _executor, _store = server_with_slash_arn
135+
136+
response = client.get_durable_execution(DurableExecutionArn=arn)
137+
138+
assert response["DurableExecutionArn"] == arn
139+
140+
141+
def test_get_durable_execution_state_decodes_slash_in_arn(server_with_slash_arn):
142+
"""GetDurableExecutionState: %2F must be decoded so the store lookup hits."""
143+
client, arn, _executor, _store = server_with_slash_arn
144+
145+
response = client.get_durable_execution_state(
146+
DurableExecutionArn=arn,
147+
CheckpointToken="ignored-by-route-layer", # noqa: S106 - test stub
148+
)
149+
150+
# Response shape varies; the only assertion this test cares about is
151+
# that we got past route resolution.
152+
assert response is not None
153+
154+
155+
def test_get_durable_execution_history_decodes_slash_in_arn(server_with_slash_arn):
156+
"""GetDurableExecutionHistory: %2F must be decoded so the store lookup hits."""
157+
client, arn, _executor, _store = server_with_slash_arn
158+
159+
response = client.get_durable_execution_history(DurableExecutionArn=arn)
160+
161+
assert response is not None
162+
163+
164+
def test_checkpoint_durable_execution_decodes_slash_in_arn(server_with_slash_arn):
165+
"""CheckpointDurableExecution: %2F must be decoded so the store lookup hits.
166+
167+
A checkpoint with no operation updates may still trip secondary
168+
validation; we only assert the failure (if any) is not the
169+
%2F-in-message 404 that indicates the route layer dropped the ball.
170+
"""
171+
client, arn, _executor, store = server_with_slash_arn
172+
execution = store.load(arn)
173+
token = execution.get_new_checkpoint_token()
174+
175+
try:
176+
client.checkpoint_durable_execution(
177+
DurableExecutionArn=arn,
178+
CheckpointToken=token,
179+
Updates=[],
180+
)
181+
except ClientError as exc:
182+
_assert_no_percent_encoding_in_error(exc, arn)
183+
184+
185+
def test_stop_durable_execution_decodes_slash_in_arn(server_with_slash_arn):
186+
"""StopDurableExecution: %2F must be decoded so the store lookup hits."""
187+
client, arn, _executor, _store = server_with_slash_arn
188+
189+
try:
190+
client.stop_durable_execution(DurableExecutionArn=arn)
191+
except ClientError as exc:
192+
_assert_no_percent_encoding_in_error(exc, arn)
193+
194+
195+
def test_list_durable_executions_by_function_decodes_colon_in_name(
196+
server_with_slash_arn,
197+
):
198+
"""ListDurableExecutionsByFunction: %3A/%24 in FunctionName must be decoded.
199+
200+
boto percent-encodes ``:`` and ``$`` in the non-greedy ``{FunctionName}``
201+
URI label, so a realistic value like ``MyFunction:$LATEST`` arrives as
202+
``MyFunction%3A%24LATEST``. The route layer must decode the segment so
203+
the store's exact-match filter on ``function_name`` returns the expected
204+
execution.
205+
206+
Pre-fix behavior: handler filters on the encoded string, response has
207+
no executions. Post-fix: handler filters on the decoded string, response
208+
returns the seeded execution.
209+
"""
210+
client, _arn, _executor, store = server_with_slash_arn
211+
212+
# Seed an execution whose function_name contains characters boto encodes.
213+
realistic_function_name = "MyFunction:$LATEST"
214+
seed = StartDurableExecutionInput(
215+
account_id="123456789012",
216+
function_name=realistic_function_name,
217+
function_qualifier="$LATEST",
218+
execution_name="encoded-fn-exec",
219+
execution_timeout_seconds=300,
220+
execution_retention_period_days=7,
221+
invocation_id="inv-encoded-fn",
222+
input='"hi"',
223+
)
224+
seeded = Execution.new(seed)
225+
seeded.start()
226+
store.save(seeded)
227+
228+
response = client.list_durable_executions_by_function(
229+
FunctionName=realistic_function_name,
230+
)
231+
232+
arns = [e["DurableExecutionArn"] for e in response.get("DurableExecutions", [])]
233+
assert seeded.durable_execution_arn in arns, (
234+
f"WebServer route layer did not URL-decode FunctionName. "
235+
f"Seeded function_name {realistic_function_name!r} produced arn "
236+
f"{seeded.durable_execution_arn!r}, but list response contained "
237+
f"{arns!r}."
238+
)

tests/web/routes_test.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -480,13 +480,28 @@ def test_route_immutability():
480480

481481

482482
def test_route_with_special_characters():
483-
"""Test route parsing with special characters in ARNs and IDs."""
484-
# Test with URL-encoded characters
485-
arn = "arn:aws:lambda:us-east-1:123456789012:function:my-function%20with%20spaces"
483+
"""Test route parsing with special characters in ARNs and IDs.
484+
485+
URL-decoding happens once in ``Route.from_string`` so every captured
486+
path segment (``segments[N]`` and any named field that mirrors it,
487+
such as ``arn`` or ``callback_id``) carries the literal value the
488+
caller passed to boto. ``raw_path`` keeps the original wire string.
489+
"""
490+
# ARN with %20-encoded spaces should round-trip back to a literal space.
491+
encoded_arn = (
492+
"arn:aws:lambda:us-east-1:123456789012:function:my-function%20with%20spaces"
493+
)
494+
decoded_arn = (
495+
"arn:aws:lambda:us-east-1:123456789012:function:my-function with spaces"
496+
)
497+
raw_path = f"/2025-12-01/durable-executions/{encoded_arn}"
486498
router = Router()
487-
route = router.find_route(f"/2025-12-01/durable-executions/{arn}", "GET")
499+
route = router.find_route(raw_path, "GET")
488500
assert isinstance(route, GetDurableExecutionRoute)
489-
assert route.arn == arn
501+
assert route.arn == decoded_arn
502+
assert route.segments[2] == decoded_arn
503+
# raw_path is preserved as the original wire form for logging/debugging.
504+
assert route.raw_path == raw_path
490505

491506
# Test with callback ID containing special characters
492507
callback_id = "callback-123-abc_def"

0 commit comments

Comments
 (0)