@@ -63,6 +63,49 @@ def _scan_context_content(content: str, filename: str) -> str:
6363 return content
6464
6565
66+ # Marker emitted by _resolve_claude_imports for each inlined @import. Matched
67+ # here so the cross-seam scan can drop it and see the surrounding bodies as one
68+ # contiguous stream.
69+ _IMPORT_MARKER_RE = re .compile (r"^>[ \t]*imported from[ \t].*$" , re .MULTILINE )
70+
71+
72+ def _section_body (section : str ) -> str :
73+ """Return the body of a rendered ``## <label>\\ n\\ n<body>`` section.
74+
75+ Drops only the leading ``## <label>`` header line we add ourselves, so the
76+ document's own markdown headings inside <body> are preserved.
77+ """
78+ parts = section .split ("\n \n " , 1 )
79+ return parts [1 ] if len (parts ) == 2 else section
80+
81+
82+ def _scan_context_seams (seam_text : str , label : str ) -> Optional [str ]:
83+ """Catch a prompt injection split ACROSS the seam between concatenated
84+ context fragments.
85+
86+ Per-fragment scanning (``_scan_context_content`` on each file/import) is
87+ blind to this: the structural markers between fragments (``## label``
88+ headers, ``> imported from ...`` import markers) insert non-word characters
89+ that break a contiguous regex, so a payload whose halves live in two
90+ adjacent fragments slips through. Callers pass the fragment bodies already
91+ joined with those markers removed; this scans them as one stream.
92+
93+ Returns a BLOCKED placeholder string when a cross-seam threat is found,
94+ else None. Blocking the whole source is the fail-safe choice: the
95+ *combination* is what is malicious.
96+ """
97+ findings = _scan_for_threats (seam_text , scope = "context" )
98+ if not findings :
99+ return None
100+ logger .warning (
101+ "Context %s blocked (cross-source seam): %s" , label , ", " .join (findings )
102+ )
103+ return (
104+ f"[BLOCKED: { label } contained potential prompt injection spanning "
105+ f"concatenated sources ({ ', ' .join (findings )} ). Content not loaded.]"
106+ )
107+
108+
66109def _find_git_root (start : Path ) -> Optional [Path ]:
67110 """Walk *start* and its parents looking for a ``.git`` directory.
68111
@@ -1920,6 +1963,14 @@ def _load_agents_md(cwd_path: Path, context_length: Optional[int] = None) -> str
19201963 "AGENTS.md context loaded from: %s" ,
19211964 ", " .join (str (p ) for p in loaded_paths ),
19221965 )
1966+ # Cross-source seam scan: catch an injection split across two adjacent
1967+ # AGENTS.md files (each scanned clean individually) by scanning the joined
1968+ # bodies (without the ## headers) as one stream.
1969+ seam_block = _scan_context_seams (
1970+ "\n " .join (_section_body (s ) for s in [* base , * overrides ]), "AGENTS.md"
1971+ )
1972+ if seam_block :
1973+ return seam_block
19231974 merged = "\n \n " .join ([* base , * overrides ])
19241975 return _truncate_content (
19251976 merged , "AGENTS.md" , context_length = context_length ,
@@ -1945,6 +1996,16 @@ def _load_claude_md(cwd_path: Path, context_length: Optional[int] = None) -> str
19451996 # paths like @config/system.md.
19461997 content = _resolve_claude_imports (content , cwd_path )
19471998 content = _scan_context_content (content , name )
1999+ # Cross-source seam scan: the blockquote import markers
2000+ # still break a contiguous regex, so a payload split
2001+ # body-head→import or import→import survives the re-scan
2002+ # above. Strip the markers and scan the bodies as one
2003+ # stream to catch it.
2004+ seam_block = _scan_context_seams (
2005+ _IMPORT_MARKER_RE .sub ("" , content ), name
2006+ )
2007+ if seam_block :
2008+ return seam_block
19482009 result = f"## { name } \n \n { content } "
19492010 return _truncate_content (
19502011 result , "CLAUDE.md" , context_length = context_length ,
0 commit comments