Skip to content

Commit f677d78

Browse files
committed
fix(canonical): ensure_ascii=False in legacy canonicalize for cross-lang parity
The legacy canonicalize() serialized string values and object keys with json.dumps default ensure_ascii=True, which escapes non-ASCII as \uXXXX. The TypeScript SDK's canonicalize() uses JSON.stringify, which emits raw UTF-8. Any payload with a non-ASCII character therefore canonicalized differently in Python than in TypeScript, breaking cross-language signatures and content hashes for those payloads. Add ensure_ascii=False to the string-value, object-key, and fallback json.dumps calls. The strict canonicalize_jcs already did this, so it is unchanged. ASCII payloads are byte-identical before and after, so existing signatures and vectors are unaffected; only non-ASCII output changes, to the correct TypeScript-matching form. Cross-impl raw-UTF-8 test added. Suite 568 passed.
1 parent d8c3de1 commit f677d78

2 files changed

Lines changed: 19 additions & 3 deletions

File tree

src/agent_passport/canonical.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ def canonicalize(obj) -> str:
3636
return str(int(obj))
3737
return json.dumps(obj)
3838
if isinstance(obj, str):
39-
return json.dumps(obj)
39+
# ensure_ascii=False to match the TypeScript SDK's JSON.stringify,
40+
# which emits raw UTF-8 and does not \u-escape non-ASCII. Without
41+
# this, a non-ASCII string canonicalizes differently in Python than
42+
# in TypeScript, breaking cross-language signatures and content hashes.
43+
return json.dumps(obj, ensure_ascii=False)
4044
if isinstance(obj, list):
4145
return "[" + ",".join(canonicalize(item) for item in obj) + "]"
4246
if isinstance(obj, dict):
@@ -45,10 +49,10 @@ def canonicalize(obj) -> str:
4549
val = obj[key]
4650
if val is None:
4751
continue
48-
pairs.append(json.dumps(key) + ":" + canonicalize(val))
52+
pairs.append(json.dumps(key, ensure_ascii=False) + ":" + canonicalize(val))
4953
return "{" + ",".join(pairs) + "}"
5054
# Fallback for other types
51-
return json.dumps(obj)
55+
return json.dumps(obj, ensure_ascii=False)
5256

5357

5458
def canonicalize_jcs(obj) -> str:

tests/test_canonical.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,15 @@ def test_empty_array():
3232

3333
def test_quotes_in_strings():
3434
assert canonicalize({"a": 'quotes "inside"'}) == '{"a":"quotes \\"inside\\""}'
35+
36+
37+
def test_non_ascii_is_raw_utf8_matching_typescript():
38+
# The TypeScript SDK's canonicalize() uses JSON.stringify, which emits raw
39+
# UTF-8 and does NOT \u-escape non-ASCII. Python json.dumps defaults to
40+
# ensure_ascii=True (\u-escaped), which would diverge and break
41+
# cross-language signatures. These pin the raw-UTF-8 form.
42+
assert canonicalize("café") == '"café"'
43+
assert canonicalize("🤖") == '"🤖"'
44+
# Non-ASCII in values and in keys (keys sorted by code point: 'a' < 'café').
45+
assert canonicalize({"name": "café"}) == '{"name":"café"}'
46+
assert canonicalize({"café": "x", "a": "y"}) == '{"a":"y","café":"x"}'

0 commit comments

Comments
 (0)