Skip to content

Commit 4be5618

Browse files
committed
harden against RCE
1 parent 8f92ed9 commit 4be5618

3 files changed

Lines changed: 244 additions & 117 deletions

File tree

.github/workflows/check-rendered-specs.yml

Lines changed: 49 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,17 @@ on:
3131

3232
permissions: {}
3333

34+
# Belt-and-suspenders: the stub also sets concurrency, but keep it here so the
35+
# contract survives refactoring / direct invocation.
36+
concurrency:
37+
group: render-check-${{ inputs.repo }}-${{ inputs.pr-number }}
38+
cancel-in-progress: true
39+
3440
jobs:
3541
check:
3642
name: Rendered specs freshness
3743
runs-on: ubuntu-latest
44+
timeout-minutes: 60
3845
permissions:
3946
contents: read
4047
pull-requests: write # Post/update/delete drift comments on PRs
@@ -78,64 +85,64 @@ jobs:
7885
-f .github/workflows/containers/render.Dockerfile \
7986
.github/workflows/containers/
8087
81-
# Render runs inside a container with SYS_ADMIN capability because mock
82-
# needs mount namespaces for chroot operations. We avoid --privileged to
83-
# limit the blast radius if a malicious spec's %() macro achieves code
84-
# execution inside the container. Only the PR checkout is mounted (not
85-
# the full workspace) — the trusted base checkout where our scripts live
86-
# is not accessible. The azldev binary is mounted read-only.
88+
# Render + drift-check run entirely inside the container. The host never
89+
# invokes git against PR data: all git operations happen in the sandboxed
90+
# environment, and the host only reads two trusted-shape outputs
91+
# (report.json, rendered-specs.patch) from a dedicated output volume.
92+
# This dodges the whole class of poisoned-.git/config attacks
93+
# (diff.external, diff drivers, filter drivers, hooks, etc.).
8794
#
88-
# NOTE: --security-opt no-new-privileges would be ideal but mock's
89-
# userhelper requires setuid, which that flag blocks.
90-
- name: Render specs
95+
# SYS_ADMIN is needed for mock (mount namespaces for chroot). We avoid
96+
# --privileged to limit blast radius. --security-opt no-new-privileges
97+
# would be nice but mock's userhelper requires setuid, which that blocks.
98+
- name: Render + check for drift
99+
id: check
100+
continue-on-error: true # TODO: flip off once check stabilizes (see PR #16674)
91101
env:
92102
WORKSPACE: ${{ github.workspace }}
93103
run: |
94-
set -o pipefail
104+
set -euo pipefail
105+
mkdir -p "$WORKSPACE/render-output"
95106
docker run --rm \
96107
--cap-add=SYS_ADMIN \
97108
--security-opt seccomp=unconfined \
98109
-v "$WORKSPACE/pr-head:/workdir" \
110+
-v "$WORKSPACE/render-output:/output" \
111+
-v "$WORKSPACE/.github/workflows/scripts:/scripts:ro" \
99112
-v "$(go env GOPATH)/bin:/gobin:ro" \
100113
-e PATH="/gobin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \
101114
localhost/azldev-render \
102-
azldev component render -q -a --clean-stale -O json \
103-
| tee render-output.json
104-
105-
- name: Resolve specs directory
106-
id: specs-dir
107-
run: |
108-
SPECS_DIR=$(azldev -C pr-head config dump -q | grep 'rendered-specs-dir' | cut -d"'" -f2)
109-
echo "path=$SPECS_DIR" >> "$GITHUB_OUTPUT"
110-
111-
- name: Check for drift
112-
id: check
113-
continue-on-error: true # Non-blocking while the feature stabilizes
114-
env:
115-
SPECS_DIR: ${{ steps.specs-dir.outputs.path }}
116-
run: |
117-
cd pr-head
118-
python -I "$GITHUB_WORKSPACE/.github/workflows/scripts/check_rendered_specs.py" \
119-
--specs-dir "$SPECS_DIR" \
120-
--report "$GITHUB_WORKSPACE/render-check-report.json" \
121-
--patch "$GITHUB_WORKSPACE/rendered-specs.patch"
115+
bash -eu -o pipefail -c '
116+
azldev component render -q -a --clean-stale -O json \
117+
> /output/render-output.json
118+
SPECS_DIR=$(azldev config dump -q -f json \
119+
| python3 -c "import json,sys; print(json.load(sys.stdin)[\"project\"][\"renderedSpecsDir\"])")
120+
python3 /scripts/check_rendered_specs.py \
121+
--specs-dir "$SPECS_DIR" \
122+
--report /output/render-check-report.json \
123+
--patch /output/rendered-specs.patch
124+
'
122125
123-
- name: Upload fix patch
126+
# Dual upload: `archive: false` (v7 feature) gives browser users a direct
127+
# download of the raw patch (artifact name is derived from the filename,
128+
# so `name:` is ignored here — that's why we need the second upload).
129+
# The zipped `rendered-specs-patch` artifact is for `gh run download`,
130+
# which only works with named artifacts.
131+
- name: Upload fix patch (unzipped, for browser download)
124132
id: upload-patch
125-
if: hashFiles('rendered-specs.patch') != ''
133+
if: hashFiles('render-output/rendered-specs.patch') != ''
126134
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
127135
with:
128-
path: rendered-specs.patch
136+
path: render-output/rendered-specs.patch
129137
archive: false
130138

131-
# Also upload as a zipped artifact so `gh run download` works.
132-
# See: https://github.com/cli/cli/issues/13012
133-
- name: Upload fix patch (for gh cli)
134-
if: hashFiles('rendered-specs.patch') != ''
139+
# See: https://github.com/cli/cli/issues/13012 for why this is needed.
140+
- name: Upload fix patch (zipped, for gh run download)
141+
if: hashFiles('render-output/rendered-specs.patch') != ''
135142
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
136143
with:
137144
name: rendered-specs-patch
138-
path: rendered-specs.patch
145+
path: render-output/rendered-specs.patch
139146

140147
- name: Post PR comment
141148
if: always()
@@ -147,10 +154,10 @@ jobs:
147154
PATCH_URL: ${{ steps.upload-patch.outputs.artifact-url }}
148155
RUN_ID: ${{ github.run_id }}
149156
run: |
150-
python -I .github/workflows/scripts/post_render_comment.py \
157+
python .github/workflows/scripts/post_render_comment.py \
151158
--repo "$PR_REPO" \
152159
--pr "$PR_NUMBER" \
153-
--report render-check-report.json \
160+
--report render-output/render-check-report.json \
154161
--artifacts-url "$PATCH_URL" \
155162
--run-id "$RUN_ID"
156163
@@ -160,4 +167,5 @@ jobs:
160167
with:
161168
name: render-output
162169
path: |
163-
render-output.json
170+
render-output/render-output.json
171+
render-output/render-check-report.json

.github/workflows/scripts/check_rendered_specs.py

Lines changed: 111 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
"""
33
Check rendered specs for drift.
44
5-
Compares the committed specs tree against the working tree (after
6-
`azldev component render -a` has been run) and reports meaningful differences,
7-
filtering out changelog-timestamp noise.
5+
Runs inside the render container: compares the committed specs tree against
6+
the working tree (after `azldev component render -a` has been run) and writes
7+
a JSON report and a git patch to the mounted output volume. The host never
8+
invokes git against PR data, so this script runs in a trusted environment
9+
and doesn't need host-side hardening against poisoned .git/config.
810
911
Usage:
10-
python check_rendered_specs.py --specs-dir specs
11-
python check_rendered_specs.py --specs-dir specs --report report.json --patch fix.patch
12+
python3 check_rendered_specs.py --specs-dir specs
13+
python3 check_rendered_specs.py --specs-dir specs --report report.json --patch fix.patch
1214
1315
Exit codes:
1416
0 — specs are up to date (timestamp-only noise filtered)
@@ -32,26 +34,29 @@
3234
# ---------------------------------------------------------------------------
3335

3436
# Matches the azldev-generated changelog line.
37+
# Tightly coupled to the format emitted by azldev's render pipeline; if azldev
38+
# ever changes the emitted form (email suffix, version tag, different
39+
# whitespace) this regex must be updated or every spec will look like drift.
40+
# Owner: azure-linux-dev-tools (cmd that emits "* <date> azldev <>" lines).
3541
# e.g. "* Wed Apr 08 2026 azldev <> - 1.0-1"
3642
_CHANGELOG_DATE_RE = re.compile(
37-
r"^\* [A-Z][a-z]{2} [A-Z][a-z]{2} [0-9]{2} [0-9]{4} azldev "
43+
r"^\* [A-Z][a-z]{2} [A-Z][a-z]{2} [0-9]{2} [0-9]{4} azldev\b"
3844
)
3945

4046
# ---------------------------------------------------------------------------
4147
# Git helpers
4248
# ---------------------------------------------------------------------------
4349

4450

45-
def _git(*args: str) -> str:
46-
"""Run a git command and return stdout."""
47-
return subprocess.run(
48-
["git", *args], capture_output=True, text=True, check=True
49-
).stdout
51+
def _git_bytes(*args: str) -> bytes:
52+
"""Run a git command and return stdout (bytes)."""
53+
return subprocess.run(["git", *args], capture_output=True, check=True).stdout
5054

5155

52-
def _git_lines(*args: str) -> list[str]:
53-
"""Run a git command and return non-empty output lines."""
54-
return [line for line in _git(*args).splitlines() if line]
56+
def _git_lines_z(*args: str) -> list[str]:
57+
"""Run a git command with -z-compatible args and return NUL-split lines."""
58+
out = _git_bytes(*args)
59+
return [p.decode("utf-8") for p in out.split(b"\x00") if p]
5560

5661

5762
# ---------------------------------------------------------------------------
@@ -64,7 +69,7 @@ def normalize_changelog_date(text: str) -> str:
6469
out: list[str] = []
6570
for line in text.splitlines(keepends=True):
6671
if _CHANGELOG_DATE_RE.match(line):
67-
line = _CHANGELOG_DATE_RE.sub("* DATEPLACEHOLDER azldev ", line)
72+
line = _CHANGELOG_DATE_RE.sub("* DATEPLACEHOLDER azldev", line)
6873
out.append(line)
6974
return "".join(out)
7075

@@ -89,11 +94,24 @@ def classify_changes(specs_dir: Path) -> tuple[list[str], list[str], list[str]]:
8994
9095
The three lists are disjoint: changed contains only modified files,
9196
missing contains only deleted files, and extra contains untracked files.
97+
98+
Untracked enumeration deliberately does NOT honor `.gitignore` — a
99+
malicious PR could otherwise commit a `.gitignore` under specs_dir to
100+
hide newly rendered files and make the check green. We also drop any
101+
`.gitignore`/`.gitattributes` files found under specs_dir since they
102+
have no business in a rendered-output tree.
92103
"""
93104
sd = str(specs_dir)
94-
changed = _git_lines("diff", "--diff-filter=M", "--name-only", "--", sd)
95-
extra = _git_lines("ls-files", "--others", "--exclude-standard", "--", sd)
96-
missing = _git_lines("ls-files", "--deleted", "--", sd)
105+
changed = _git_lines_z("diff", "-z", "--diff-filter=M", "--name-only", "--", sd)
106+
# No --exclude-standard: list ALL untracked files so a PR-committed
107+
# .gitignore under specs_dir can't hide drift.
108+
extra_raw = _git_lines_z("ls-files", "-z", "--others", "--", sd)
109+
missing = _git_lines_z("ls-files", "-z", "--deleted", "--", sd)
110+
# Filter out .gitignore / .gitattributes anywhere under specs_dir — they
111+
# shouldn't be in rendered output and shouldn't influence our enumeration.
112+
extra = [
113+
p for p in extra_raw if Path(p).name not in (".gitignore", ".gitattributes")
114+
]
97115
return changed, extra, missing
98116

99117

@@ -108,14 +126,30 @@ def filter_timestamp_noise(changed_files: list[str], specs_dir: Path) -> list[di
108126
file_path = Path(path_str)
109127
is_spec = file_path.suffix == ".spec"
110128

129+
# Symlinks in a rendered-output tree are suspicious (could point
130+
# anywhere on the runner's filesystem). Flag and skip content reads.
131+
if file_path.is_symlink():
132+
print(
133+
f"::warning::Symlink in rendered-specs tree, treated as drift: {path_str}",
134+
file=sys.stderr,
135+
)
136+
real_diffs.append(
137+
{
138+
"path": path_str,
139+
"component": component_from_path(path_str),
140+
"diff": f"Symlink {path_str} — refusing to follow",
141+
}
142+
)
143+
continue
144+
111145
# Read committed version — use bytes to handle binary files
112146
try:
113-
committed_bytes = subprocess.run(
114-
["git", "show", f"HEAD:{path_str}"],
115-
capture_output=True,
116-
check=True,
117-
).stdout
118-
except subprocess.CalledProcessError:
147+
committed_bytes = _git_bytes("show", f"HEAD:{path_str}")
148+
except subprocess.CalledProcessError as exc:
149+
print(
150+
f"::warning::git show HEAD:{path_str} failed: {exc}",
151+
file=sys.stderr,
152+
)
119153
continue
120154

121155
# Try to decode as UTF-8; if it fails, it's binary — always a real diff
@@ -131,9 +165,26 @@ def filter_timestamp_noise(changed_files: list[str], specs_dir: Path) -> list[di
131165
)
132166
continue
133167

168+
# Read working tree; use O_NOFOLLOW-equivalent guard above (is_symlink),
169+
# and strict decode so true binaries route through the binary branch.
134170
try:
135-
working = file_path.read_text(encoding="utf-8", errors="replace")
171+
working_bytes = file_path.read_bytes()
136172
except FileNotFoundError:
173+
print(
174+
f"::warning::working tree file missing during read: {path_str}",
175+
file=sys.stderr,
176+
)
177+
continue
178+
try:
179+
working = working_bytes.decode("utf-8")
180+
except UnicodeDecodeError:
181+
real_diffs.append(
182+
{
183+
"path": path_str,
184+
"component": component_from_path(path_str),
185+
"diff": f"Binary file {path_str} differs",
186+
}
187+
)
137188
continue
138189

139190
if is_spec:
@@ -228,53 +279,54 @@ def generate_patch(
228279
if not paths:
229280
return b""
230281

231-
# Write paths to temp files outside CWD (which may be an untrusted
232-
# PR checkout where symlinks could redirect writes).
233282
pathspec_fd, pathspec_path = tempfile.mkstemp(prefix="render-check-", suffix=".txt")
234283
with os.fdopen(pathspec_fd, "w") as f:
235284
f.write("\n".join(paths))
236285

237-
# Mark untracked files as intent-to-add so git diff includes them
238-
extra_pathspec_path = None
239-
if extra_files:
240-
extra_fd, extra_pathspec_path = tempfile.mkstemp(
241-
prefix="render-check-extra-", suffix=".txt"
242-
)
243-
with os.fdopen(extra_fd, "w") as f:
244-
f.write("\n".join(extra_files))
245-
try:
286+
extra_pathspec_path: str | None = None
287+
try:
288+
# Mark untracked files as intent-to-add so git diff includes them
289+
if extra_files:
290+
extra_fd, extra_pathspec_path = tempfile.mkstemp(
291+
prefix="render-check-extra-", suffix=".txt"
292+
)
293+
with os.fdopen(extra_fd, "w") as f:
294+
f.write("\n".join(extra_files))
246295
subprocess.run(
247296
["git", "add", "-N", "--pathspec-from-file", extra_pathspec_path],
248297
check=True,
249298
capture_output=True,
250299
)
251-
except subprocess.CalledProcessError:
252-
pass
253300

254-
try:
255-
result = subprocess.run(
256-
["git", "diff", "--pathspec-from-file", pathspec_path],
257-
capture_output=True,
258-
check=True,
259-
)
260-
patch = result.stdout
261-
except subprocess.CalledProcessError:
262-
patch = b""
263-
finally:
264-
Path(pathspec_path).unlink(missing_ok=True)
265-
266-
# Undo the intent-to-add so we don't leave index dirty
267-
if extra_pathspec_path:
268301
try:
269-
subprocess.run(
270-
["git", "reset", "--pathspec-from-file", extra_pathspec_path],
271-
check=True,
302+
result = subprocess.run(
303+
["git", "diff", "--pathspec-from-file", pathspec_path],
272304
capture_output=True,
305+
check=True,
306+
)
307+
patch = result.stdout
308+
except subprocess.CalledProcessError as exc:
309+
print(
310+
f"::warning::git diff failed while generating patch: {exc}",
311+
file=sys.stderr,
273312
)
274-
except subprocess.CalledProcessError:
275-
pass
276-
finally:
277-
Path(extra_pathspec_path).unlink(missing_ok=True)
313+
patch = b""
314+
finally:
315+
Path(pathspec_path).unlink(missing_ok=True)
316+
if extra_pathspec_path:
317+
try:
318+
subprocess.run(
319+
["git", "reset", "--pathspec-from-file", extra_pathspec_path],
320+
check=True,
321+
capture_output=True,
322+
)
323+
except subprocess.CalledProcessError as exc:
324+
print(
325+
f"::warning::git reset (undo intent-to-add) failed: {exc}",
326+
file=sys.stderr,
327+
)
328+
finally:
329+
Path(extra_pathspec_path).unlink(missing_ok=True)
278330

279331
return patch
280332

0 commit comments

Comments
 (0)