Skip to content

Commit 2513c45

Browse files
authored
feat: add tag_session with Unicode sanitization (#670)
Stacked on #668 (`rename_session`). ## Changes - NEW: `tag_session(session_id, tag, directory=None)` — appends a `{type:'tag',tag:<tag>,sessionId:<id>}` JSONL entry. `list_sessions()` reads the LAST tag from the file tail — most recent wins. - Passing `None` appends an empty-string tag entry (`{"tag":""}`) which readers treat as cleared. ## Port notes **Unicode sanitization** (for CLI filter compat): - Python's `re` module doesn't support `\p{Cf}` etc. without the third-party `regex` module, so `_sanitize_unicode()` uses `unicodedata.category(c) not in {"Cf","Co","Cn"}` per-character — semantically equivalent to TS's `/[\p{Cf}\p{Co}\p{Cn}]/gu` regex (and more reliable than TS's own ES-engine fallback). - Applies NFKC normalization + explicit ranges (zero-width chars, directional marks, BOM, private-use area) iteratively until stable (max 10 passes). **Other behavior:** - Tag is `.strip()`ed before storing; whitespace-only tags rejected (`ValueError`). - Tags that are pure invisible characters (e.g. only zero-width spaces) are rejected after sanitization. - Reuses `_append_to_session` from the `rename_session` PR (worktree fallback, 0-byte stub skip, `O_APPEND` without `O_CREAT`). <!-- CHANGELOG:START --> - Add `tag_session()` for tagging sessions (with Unicode sanitization) <!-- CHANGELOG:END --> ## Tests 17 new tests (TestTagSession, TestSanitizeUnicode). 298 total pass. Ruff + mypy clean.
1 parent 8cbf851 commit 2513c45

3 files changed

Lines changed: 320 additions & 4 deletions

File tree

src/claude_agent_sdk/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
CLINotFoundError,
1414
ProcessError,
1515
)
16-
from ._internal.session_mutations import rename_session
16+
from ._internal.session_mutations import rename_session, tag_session
1717
from ._internal.sessions import get_session_messages, list_sessions
1818
from ._internal.transport import Transport
1919
from ._version import __version__
@@ -423,6 +423,7 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> Any:
423423
"SessionMessage",
424424
# Session mutations
425425
"rename_session",
426+
"tag_session",
426427
# Beta support
427428
"SdkBeta",
428429
# Sandbox support

src/claude_agent_sdk/_internal/session_mutations.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import errno
2323
import json
2424
import os
25+
import re
26+
import unicodedata
2527
from pathlib import Path
2628

2729
from .sessions import (
@@ -92,6 +94,72 @@ def rename_session(
9294
_append_to_session(session_id, data, directory)
9395

9496

97+
def tag_session(
98+
session_id: str,
99+
tag: str | None,
100+
directory: str | None = None,
101+
) -> None:
102+
"""Tag a session. Pass ``None`` to clear the tag.
103+
104+
Appends a ``{type:'tag',tag:<tag>,sessionId:<id>}`` JSONL entry.
105+
``list_sessions`` reads the LAST tag from the file tail — most recent
106+
wins. Passing ``None`` appends an empty-string tag entry which
107+
``list_sessions`` treats as ``None`` (cleared).
108+
109+
Tags are Unicode-sanitized before storing (removes zero-width chars,
110+
directional marks, private-use characters, etc.) for CLI filter
111+
compatibility.
112+
113+
Args:
114+
session_id: UUID of the session to tag.
115+
tag: Tag string, or ``None`` to clear. Leading/trailing whitespace
116+
is stripped. Must be non-empty after sanitization and stripping
117+
(unless ``None``).
118+
directory: Project directory path (same semantics as
119+
``list_sessions(directory=...)``). When omitted, all project
120+
directories are searched for the session file.
121+
122+
Raises:
123+
ValueError: If ``session_id`` is not a valid UUID, or if ``tag`` is
124+
empty/whitespace-only after sanitization.
125+
FileNotFoundError: If the session file cannot be found.
126+
127+
Example:
128+
Tag a session::
129+
130+
tag_session(
131+
"550e8400-e29b-41d4-a716-446655440000",
132+
"experiment",
133+
directory="/path/to/project",
134+
)
135+
136+
Clear a tag::
137+
138+
tag_session(session_id, None)
139+
"""
140+
if not _validate_uuid(session_id):
141+
raise ValueError(f"Invalid session_id: {session_id}")
142+
if tag is not None:
143+
sanitized = _sanitize_unicode(tag).strip()
144+
if not sanitized:
145+
raise ValueError("tag must be non-empty (use None to clear)")
146+
tag = sanitized
147+
148+
data = (
149+
json.dumps(
150+
{
151+
"type": "tag",
152+
"tag": tag if tag is not None else "",
153+
"sessionId": session_id,
154+
},
155+
separators=(",", ":"),
156+
)
157+
+ "\n"
158+
)
159+
160+
_append_to_session(session_id, data, directory)
161+
162+
95163
# ---------------------------------------------------------------------------
96164
# Helpers
97165
# ---------------------------------------------------------------------------
@@ -185,3 +253,49 @@ def _try_append(path: Path, data: str) -> bool:
185253
return True
186254
finally:
187255
os.close(fd)
256+
257+
258+
# ---------------------------------------------------------------------------
259+
# Unicode sanitization — ported from TS sanitization.ts
260+
# ---------------------------------------------------------------------------
261+
262+
# Explicit ranges for dangerous Unicode characters. Python's regex supports
263+
# Unicode categories via \p{} only in the third-party `regex` module, so we
264+
# use explicit ranges here (matching the TS fallback paths).
265+
_UNICODE_STRIP_RE = re.compile(
266+
"["
267+
"\u200b-\u200f" # Zero-width spaces, LTR/RTL marks
268+
"\u202a-\u202e" # Directional formatting characters
269+
"\u2066-\u2069" # Directional isolates
270+
"\ufeff" # Byte order mark
271+
"\ue000-\uf8ff" # Basic Multilingual Plane private use
272+
"]"
273+
)
274+
275+
# Format characters (Cf category) — the ones most commonly abused for
276+
# injection. We check this per-character since Python's re module doesn't
277+
# support \p{Cf} without the third-party regex module.
278+
_FORMAT_CATEGORIES = frozenset({"Cf", "Co", "Cn"})
279+
280+
281+
def _sanitize_unicode(value: str) -> str:
282+
"""Sanitize a string by removing dangerous Unicode characters.
283+
284+
Ported from TS ``partiallySanitizeUnicode``. Iteratively applies NFKC
285+
normalization and strips format/private-use/unassigned characters until
286+
no more changes occur (max 10 iterations).
287+
"""
288+
current = value
289+
for _ in range(10):
290+
previous = current
291+
# Apply NFKC normalization to handle composed character sequences
292+
current = unicodedata.normalize("NFKC", current)
293+
# Strip Cf (format), Co (private use), Cn (unassigned) categories
294+
current = "".join(
295+
c for c in current if unicodedata.category(c) not in _FORMAT_CATEGORIES
296+
)
297+
# Explicit ranges (redundant with category check but matches TS)
298+
current = _UNICODE_STRIP_RE.sub("", current)
299+
if current == previous:
300+
break
301+
return current

tests/test_session_mutations.py

Lines changed: 204 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Tests for session mutation functions (rename_session, delete_session, tag_session)."""
1+
"""Tests for session mutation functions (rename_session, tag_session)."""
22

33
from __future__ import annotations
44

@@ -9,8 +9,11 @@
99

1010
import pytest
1111

12-
from claude_agent_sdk import list_sessions, rename_session
13-
from claude_agent_sdk._internal.session_mutations import _try_append
12+
from claude_agent_sdk import list_sessions, rename_session, tag_session
13+
from claude_agent_sdk._internal.session_mutations import (
14+
_sanitize_unicode,
15+
_try_append,
16+
)
1417
from claude_agent_sdk._internal.sessions import _sanitize_path
1518

1619
# ---------------------------------------------------------------------------
@@ -253,3 +256,201 @@ def test_compact_json_format(self, claude_config_dir: Path, tmp_path: Path):
253256
assert lines[-1] == (
254257
f'{{"type":"custom-title","customTitle":"Title","sessionId":"{sid}"}}'
255258
)
259+
260+
261+
# ---------------------------------------------------------------------------
262+
# tag_session() tests
263+
# ---------------------------------------------------------------------------
264+
265+
266+
class TestTagSession:
267+
"""Tests for tag_session()."""
268+
269+
def test_invalid_session_id_raises(self, claude_config_dir: Path):
270+
"""Non-UUID session_id raises ValueError."""
271+
with pytest.raises(ValueError, match="Invalid session_id"):
272+
tag_session("not-a-uuid", "tag")
273+
with pytest.raises(ValueError, match="Invalid session_id"):
274+
tag_session("", "tag")
275+
276+
def test_empty_tag_raises(self, claude_config_dir: Path, tmp_path: Path):
277+
"""Empty or whitespace-only tag raises ValueError."""
278+
project_path = str(tmp_path / "proj")
279+
Path(project_path).mkdir(parents=True)
280+
project_dir = _make_project_dir(
281+
claude_config_dir, os.path.realpath(project_path)
282+
)
283+
sid, _ = _make_session_file(project_dir)
284+
285+
with pytest.raises(ValueError, match="tag must be non-empty"):
286+
tag_session(sid, "", directory=project_path)
287+
with pytest.raises(ValueError, match="tag must be non-empty"):
288+
tag_session(sid, " ", directory=project_path)
289+
290+
def test_session_not_found_raises(self, claude_config_dir: Path, tmp_path: Path):
291+
"""Session not found raises FileNotFoundError."""
292+
project_path = str(tmp_path / "proj")
293+
Path(project_path).mkdir(parents=True)
294+
_make_project_dir(claude_config_dir, os.path.realpath(project_path))
295+
296+
sid = str(uuid.uuid4())
297+
with pytest.raises(FileNotFoundError):
298+
tag_session(sid, "tag", directory=project_path)
299+
300+
def test_appends_tag_entry(self, claude_config_dir: Path, tmp_path: Path):
301+
"""tag_session appends a {type:'tag'} JSON line."""
302+
project_path = str(tmp_path / "proj")
303+
Path(project_path).mkdir(parents=True)
304+
project_dir = _make_project_dir(
305+
claude_config_dir, os.path.realpath(project_path)
306+
)
307+
sid, file_path = _make_session_file(project_dir)
308+
309+
tag_session(sid, "experiment", directory=project_path)
310+
311+
lines = file_path.read_text().strip().split("\n")
312+
entry = json.loads(lines[-1])
313+
assert entry["type"] == "tag"
314+
assert entry["tag"] == "experiment"
315+
assert entry["sessionId"] == sid
316+
317+
def test_tag_trimmed(self, claude_config_dir: Path, tmp_path: Path):
318+
"""Leading/trailing whitespace is stripped from tag."""
319+
project_path = str(tmp_path / "proj")
320+
Path(project_path).mkdir(parents=True)
321+
project_dir = _make_project_dir(
322+
claude_config_dir, os.path.realpath(project_path)
323+
)
324+
sid, file_path = _make_session_file(project_dir)
325+
326+
tag_session(sid, " my-tag ", directory=project_path)
327+
328+
lines = file_path.read_text().strip().split("\n")
329+
entry = json.loads(lines[-1])
330+
assert entry["tag"] == "my-tag"
331+
332+
def test_none_clears_tag(self, claude_config_dir: Path, tmp_path: Path):
333+
"""Passing None appends an empty-string tag entry (clears tag)."""
334+
project_path = str(tmp_path / "proj")
335+
Path(project_path).mkdir(parents=True)
336+
project_dir = _make_project_dir(
337+
claude_config_dir, os.path.realpath(project_path)
338+
)
339+
sid, file_path = _make_session_file(project_dir)
340+
341+
tag_session(sid, "original-tag", directory=project_path)
342+
tag_session(sid, None, directory=project_path)
343+
344+
lines = file_path.read_text().strip().split("\n")
345+
# Last entry is the clear
346+
entry = json.loads(lines[-1])
347+
assert entry["type"] == "tag"
348+
assert entry["tag"] == ""
349+
assert entry["sessionId"] == sid
350+
351+
def test_last_wins(self, claude_config_dir: Path, tmp_path: Path):
352+
"""Multiple tag calls — last one lands at EOF."""
353+
project_path = str(tmp_path / "proj")
354+
Path(project_path).mkdir(parents=True)
355+
project_dir = _make_project_dir(
356+
claude_config_dir, os.path.realpath(project_path)
357+
)
358+
sid, file_path = _make_session_file(project_dir)
359+
360+
tag_session(sid, "first", directory=project_path)
361+
tag_session(sid, "second", directory=project_path)
362+
tag_session(sid, "third", directory=project_path)
363+
364+
lines = file_path.read_text().strip().split("\n")
365+
entry = json.loads(lines[-1])
366+
assert entry["tag"] == "third"
367+
# All three tag entries present in file
368+
tag_lines = [
369+
json.loads(line) for line in lines if json.loads(line).get("type") == "tag"
370+
]
371+
assert len(tag_lines) == 3
372+
373+
def test_compact_json_format(self, claude_config_dir: Path, tmp_path: Path):
374+
"""Appended JSON uses compact separators matching CLI."""
375+
project_path = str(tmp_path / "proj")
376+
Path(project_path).mkdir(parents=True)
377+
project_dir = _make_project_dir(
378+
claude_config_dir, os.path.realpath(project_path)
379+
)
380+
sid, file_path = _make_session_file(project_dir)
381+
382+
tag_session(sid, "mytag", directory=project_path)
383+
384+
lines = file_path.read_text().strip().split("\n")
385+
assert lines[-1] == f'{{"type":"tag","tag":"mytag","sessionId":"{sid}"}}'
386+
387+
def test_unicode_sanitization(self, claude_config_dir: Path, tmp_path: Path):
388+
"""Tag is sanitized: zero-width chars stripped."""
389+
project_path = str(tmp_path / "proj")
390+
Path(project_path).mkdir(parents=True)
391+
project_dir = _make_project_dir(
392+
claude_config_dir, os.path.realpath(project_path)
393+
)
394+
sid, file_path = _make_session_file(project_dir)
395+
396+
# Tag with zero-width space and BOM embedded
397+
dirty_tag = "clean\u200btag\ufeff"
398+
tag_session(sid, dirty_tag, directory=project_path)
399+
400+
lines = file_path.read_text().strip().split("\n")
401+
entry = json.loads(lines[-1])
402+
assert entry["tag"] == "cleantag"
403+
404+
def test_sanitization_rejects_pure_invisible(
405+
self, claude_config_dir: Path, tmp_path: Path
406+
):
407+
"""Tag that is only zero-width chars is rejected."""
408+
project_path = str(tmp_path / "proj")
409+
Path(project_path).mkdir(parents=True)
410+
project_dir = _make_project_dir(
411+
claude_config_dir, os.path.realpath(project_path)
412+
)
413+
sid, _ = _make_session_file(project_dir)
414+
415+
with pytest.raises(ValueError, match="tag must be non-empty"):
416+
tag_session(sid, "\u200b\u200c\ufeff", directory=project_path)
417+
418+
419+
class TestSanitizeUnicode:
420+
"""Tests for the _sanitize_unicode helper."""
421+
422+
def test_passthrough_clean_string(self):
423+
"""Clean strings pass through unchanged."""
424+
assert _sanitize_unicode("hello") == "hello"
425+
assert _sanitize_unicode("tag-with-dashes_123") == "tag-with-dashes_123"
426+
427+
def test_strips_zero_width(self):
428+
"""Zero-width spaces/joiners are stripped."""
429+
assert _sanitize_unicode("a\u200bb") == "ab"
430+
assert _sanitize_unicode("a\u200cb") == "ab" # zero-width non-joiner
431+
assert _sanitize_unicode("a\u200db") == "ab" # zero-width joiner
432+
433+
def test_strips_bom(self):
434+
"""Byte order mark is stripped."""
435+
assert _sanitize_unicode("\ufeffhello") == "hello"
436+
437+
def test_strips_directional_marks(self):
438+
"""LTR/RTL marks and isolates are stripped."""
439+
assert _sanitize_unicode("a\u202ab\u202cc") == "abc"
440+
assert _sanitize_unicode("a\u2066b\u2069c") == "abc"
441+
442+
def test_strips_private_use(self):
443+
"""Private use area characters are stripped."""
444+
assert _sanitize_unicode("a\ue000b") == "ab"
445+
assert _sanitize_unicode("a\uf8ffb") == "ab"
446+
447+
def test_nfkc_normalization(self):
448+
"""NFKC normalization is applied (composed chars)."""
449+
# Fullwidth 'A' → ASCII 'A'
450+
assert _sanitize_unicode("\uff21") == "A"
451+
452+
def test_iterative_converges(self):
453+
"""Handles multi-pass cases safely (max 10 iterations)."""
454+
# A string that needs multiple passes still converges
455+
result = _sanitize_unicode("a" + "\u200b" * 20 + "b")
456+
assert result == "ab"

0 commit comments

Comments
 (0)