22"""
33Check rendered specs for drift.
44
5- Compares the committed specs tree against the working tree (after
6- `azldev component render -a` has been run) and reports meaningful differences,
7- filtering out changelog-timestamp noise.
5+ Runs inside the render container: compares the committed specs tree against
6+ the working tree (after `azldev component render -a` has been run) and writes
7+ a JSON report and a git patch to the mounted output volume. The host never
8+ invokes git against PR data, so this script runs in a trusted environment
9+ and doesn't need host-side hardening against poisoned .git/config.
810
911Usage:
10- python check_rendered_specs.py --specs-dir specs
11- python check_rendered_specs.py --specs-dir specs --report report.json --patch fix.patch
12+ python3 check_rendered_specs.py --specs-dir specs
13+ python3 check_rendered_specs.py --specs-dir specs --report report.json --patch fix.patch
1214
1315Exit codes:
1416 0 — specs are up to date (timestamp-only noise filtered)
3234# ---------------------------------------------------------------------------
3335
3436# Matches the azldev-generated changelog line.
37+ # Tightly coupled to the format emitted by azldev's render pipeline; if azldev
38+ # ever changes the emitted form (email suffix, version tag, different
39+ # whitespace) this regex must be updated or every spec will look like drift.
40+ # Owner: azure-linux-dev-tools (cmd that emits "* <date> azldev <>" lines).
3541# e.g. "* Wed Apr 08 2026 azldev <> - 1.0-1"
3642_CHANGELOG_DATE_RE = re .compile (
37- r"^\* [A-Z][a-z]{2} [A-Z][a-z]{2} [0-9]{2} [0-9]{4} azldev "
43+ r"^\* [A-Z][a-z]{2} [A-Z][a-z]{2} [0-9]{2} [0-9]{4} azldev\b "
3844)
3945
4046# ---------------------------------------------------------------------------
4147# Git helpers
4248# ---------------------------------------------------------------------------
4349
4450
45- def _git (* args : str ) -> str :
46- """Run a git command and return stdout."""
47- return subprocess .run (
48- ["git" , * args ], capture_output = True , text = True , check = True
49- ).stdout
51+ def _git_bytes (* args : str ) -> bytes :
52+ """Run a git command and return stdout (bytes)."""
53+ return subprocess .run (["git" , * args ], capture_output = True , check = True ).stdout
5054
5155
52- def _git_lines (* args : str ) -> list [str ]:
53- """Run a git command and return non-empty output lines."""
54- return [line for line in _git (* args ).splitlines () if line ]
56+ def _git_lines_z (* args : str ) -> list [str ]:
57+ """Run a git command with -z-compatible args and return NUL-split lines."""
58+ out = _git_bytes (* args )
59+ return [p .decode ("utf-8" ) for p in out .split (b"\x00 " ) if p ]
5560
5661
5762# ---------------------------------------------------------------------------
@@ -64,7 +69,7 @@ def normalize_changelog_date(text: str) -> str:
6469 out : list [str ] = []
6570 for line in text .splitlines (keepends = True ):
6671 if _CHANGELOG_DATE_RE .match (line ):
67- line = _CHANGELOG_DATE_RE .sub ("* DATEPLACEHOLDER azldev " , line )
72+ line = _CHANGELOG_DATE_RE .sub ("* DATEPLACEHOLDER azldev" , line )
6873 out .append (line )
6974 return "" .join (out )
7075
@@ -89,11 +94,24 @@ def classify_changes(specs_dir: Path) -> tuple[list[str], list[str], list[str]]:
8994
9095 The three lists are disjoint: changed contains only modified files,
9196 missing contains only deleted files, and extra contains untracked files.
97+
98+ Untracked enumeration deliberately does NOT honor `.gitignore` — a
99+ malicious PR could otherwise commit a `.gitignore` under specs_dir to
100+ hide newly rendered files and make the check green. We also drop any
101+ `.gitignore`/`.gitattributes` files found under specs_dir since they
102+ have no business in a rendered-output tree.
92103 """
93104 sd = str (specs_dir )
94- changed = _git_lines ("diff" , "--diff-filter=M" , "--name-only" , "--" , sd )
95- extra = _git_lines ("ls-files" , "--others" , "--exclude-standard" , "--" , sd )
96- missing = _git_lines ("ls-files" , "--deleted" , "--" , sd )
105+ changed = _git_lines_z ("diff" , "-z" , "--diff-filter=M" , "--name-only" , "--" , sd )
106+ # No --exclude-standard: list ALL untracked files so a PR-committed
107+ # .gitignore under specs_dir can't hide drift.
108+ extra_raw = _git_lines_z ("ls-files" , "-z" , "--others" , "--" , sd )
109+ missing = _git_lines_z ("ls-files" , "-z" , "--deleted" , "--" , sd )
110+ # Filter out .gitignore / .gitattributes anywhere under specs_dir — they
111+ # shouldn't be in rendered output and shouldn't influence our enumeration.
112+ extra = [
113+ p for p in extra_raw if Path (p ).name not in (".gitignore" , ".gitattributes" )
114+ ]
97115 return changed , extra , missing
98116
99117
@@ -108,14 +126,30 @@ def filter_timestamp_noise(changed_files: list[str], specs_dir: Path) -> list[di
108126 file_path = Path (path_str )
109127 is_spec = file_path .suffix == ".spec"
110128
129+ # Symlinks in a rendered-output tree are suspicious (could point
130+ # anywhere on the runner's filesystem). Flag and skip content reads.
131+ if file_path .is_symlink ():
132+ print (
133+ f"::warning::Symlink in rendered-specs tree, treated as drift: { path_str } " ,
134+ file = sys .stderr ,
135+ )
136+ real_diffs .append (
137+ {
138+ "path" : path_str ,
139+ "component" : component_from_path (path_str ),
140+ "diff" : f"Symlink { path_str } — refusing to follow" ,
141+ }
142+ )
143+ continue
144+
111145 # Read committed version — use bytes to handle binary files
112146 try :
113- committed_bytes = subprocess . run (
114- [ "git" , "show" , f"HEAD: { path_str } " ],
115- capture_output = True ,
116- check = True ,
117- ). stdout
118- except subprocess . CalledProcessError :
147+ committed_bytes = _git_bytes ( "show" , f"HEAD: { path_str } " )
148+ except subprocess . CalledProcessError as exc :
149+ print (
150+ f"::warning::git show HEAD: { path_str } failed: { exc } " ,
151+ file = sys . stderr ,
152+ )
119153 continue
120154
121155 # Try to decode as UTF-8; if it fails, it's binary — always a real diff
@@ -131,9 +165,26 @@ def filter_timestamp_noise(changed_files: list[str], specs_dir: Path) -> list[di
131165 )
132166 continue
133167
168+ # Read working tree; use O_NOFOLLOW-equivalent guard above (is_symlink),
169+ # and strict decode so true binaries route through the binary branch.
134170 try :
135- working = file_path .read_text ( encoding = "utf-8" , errors = "replace" )
171+ working_bytes = file_path .read_bytes ( )
136172 except FileNotFoundError :
173+ print (
174+ f"::warning::working tree file missing during read: { path_str } " ,
175+ file = sys .stderr ,
176+ )
177+ continue
178+ try :
179+ working = working_bytes .decode ("utf-8" )
180+ except UnicodeDecodeError :
181+ real_diffs .append (
182+ {
183+ "path" : path_str ,
184+ "component" : component_from_path (path_str ),
185+ "diff" : f"Binary file { path_str } differs" ,
186+ }
187+ )
137188 continue
138189
139190 if is_spec :
@@ -228,53 +279,54 @@ def generate_patch(
228279 if not paths :
229280 return b""
230281
231- # Write paths to temp files outside CWD (which may be an untrusted
232- # PR checkout where symlinks could redirect writes).
233282 pathspec_fd , pathspec_path = tempfile .mkstemp (prefix = "render-check-" , suffix = ".txt" )
234283 with os .fdopen (pathspec_fd , "w" ) as f :
235284 f .write ("\n " .join (paths ))
236285
237- # Mark untracked files as intent-to-add so git diff includes them
238- extra_pathspec_path = None
239- if extra_files :
240- extra_fd , extra_pathspec_path = tempfile . mkstemp (
241- prefix = "render-check-extra-" , suffix = ".txt"
242- )
243- with os . fdopen ( extra_fd , "w" ) as f :
244- f . write ( " \n " . join ( extra_files ))
245- try :
286+ extra_pathspec_path : str | None = None
287+ try :
288+ # Mark untracked files as intent-to-add so git diff includes them
289+ if extra_files :
290+ extra_fd , extra_pathspec_path = tempfile . mkstemp (
291+ prefix = "render-check-extra-" , suffix = ".txt"
292+ )
293+ with os . fdopen ( extra_fd , "w" ) as f :
294+ f . write ( " \n " . join ( extra_files ))
246295 subprocess .run (
247296 ["git" , "add" , "-N" , "--pathspec-from-file" , extra_pathspec_path ],
248297 check = True ,
249298 capture_output = True ,
250299 )
251- except subprocess .CalledProcessError :
252- pass
253300
254- try :
255- result = subprocess .run (
256- ["git" , "diff" , "--pathspec-from-file" , pathspec_path ],
257- capture_output = True ,
258- check = True ,
259- )
260- patch = result .stdout
261- except subprocess .CalledProcessError :
262- patch = b""
263- finally :
264- Path (pathspec_path ).unlink (missing_ok = True )
265-
266- # Undo the intent-to-add so we don't leave index dirty
267- if extra_pathspec_path :
268301 try :
269- subprocess .run (
270- ["git" , "reset" , "--pathspec-from-file" , extra_pathspec_path ],
271- check = True ,
302+ result = subprocess .run (
303+ ["git" , "diff" , "--pathspec-from-file" , pathspec_path ],
272304 capture_output = True ,
305+ check = True ,
306+ )
307+ patch = result .stdout
308+ except subprocess .CalledProcessError as exc :
309+ print (
310+ f"::warning::git diff failed while generating patch: { exc } " ,
311+ file = sys .stderr ,
273312 )
274- except subprocess .CalledProcessError :
275- pass
276- finally :
277- Path (extra_pathspec_path ).unlink (missing_ok = True )
313+ patch = b""
314+ finally :
315+ Path (pathspec_path ).unlink (missing_ok = True )
316+ if extra_pathspec_path :
317+ try :
318+ subprocess .run (
319+ ["git" , "reset" , "--pathspec-from-file" , extra_pathspec_path ],
320+ check = True ,
321+ capture_output = True ,
322+ )
323+ except subprocess .CalledProcessError as exc :
324+ print (
325+ f"::warning::git reset (undo intent-to-add) failed: { exc } " ,
326+ file = sys .stderr ,
327+ )
328+ finally :
329+ Path (extra_pathspec_path ).unlink (missing_ok = True )
278330
279331 return patch
280332
0 commit comments