Skip to content

Commit 1e647d8

Browse files
benvinegarclaude
andcommitted
fix: harden script error paths and auto-discover smoke tests
- scan_podcast.swift: validate input existence, positive sample interval, readable duration, and a minimum sample count so audio-only or missing inputs fail with a clean error instead of a Swift range trap - download_sample_media.sh: encode to a .partial file and atomically rename on success so interrupted downloads never leave truncated samples; reject multiple positional output dirs - cut_clips.py: catch ffmpeg failures with a clean error, write the manifest for already-successful clips before exiting, and shlex-quote the dry-run command output - auto-discover tests/test_*.sh in smoke.test.ts and scripts/test.sh instead of maintaining the list in two places - add a cut_clips --dry-run parsing test (markdown separators, JSON, CSV, dedupe, spaced paths) and scanner degenerate-input tests Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent cf567d0 commit 1e647d8

8 files changed

Lines changed: 186 additions & 22 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,7 @@ All notable user-visible changes to this project are documented in this file.
1212
### Changed
1313

1414
### Fixed
15+
16+
- Make the visual scanner fail with clean errors instead of crashing on missing inputs, zero sample intervals, and audio-only files.
17+
- Download sample media atomically so interrupted downloads no longer leave truncated files that look complete.
18+
- Report ffmpeg clip-cut failures cleanly and still write the manifest for clips that already succeeded.

scripts/cut_clips.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import csv
3636
import json
3737
import re
38+
import shlex
3839
import shutil
3940
import subprocess
4041
import sys
@@ -269,6 +270,12 @@ def iter_selected_clips(clips: list[ClipSpec], limit: Optional[int]) -> Iterable
269270
yield index, clip
270271

271272

273+
def write_manifest(manifest: dict[str, Any], output_dir: Path) -> None:
274+
manifest_path = output_dir / "manifest.json"
275+
manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8")
276+
print(f"wrote manifest: {manifest_path}", file=sys.stderr)
277+
278+
272279
def main() -> int:
273280
parser = argparse.ArgumentParser(description="Cut social/highlight clip candidates from media using ffmpeg.")
274281
parser.add_argument("input_media", type=Path, help="Source audio/video file")
@@ -337,9 +344,17 @@ def main() -> int:
337344
)
338345

339346
if args.dry_run:
340-
print(" ".join(command))
347+
print(shlex.join(command))
341348
else:
342-
subprocess.run(command, check=True)
349+
try:
350+
subprocess.run(command, check=True)
351+
except subprocess.CalledProcessError as error:
352+
# Record the clips that already succeeded before bailing out.
353+
write_manifest(manifest, args.output_dir)
354+
raise SystemExit(
355+
f"error: ffmpeg failed for clip {index} ({clip.title}): "
356+
f"exit code {error.returncode}"
357+
)
343358
print(output_path)
344359

345360
manifest["clips"].append(
@@ -361,9 +376,7 @@ def main() -> int:
361376
}
362377
)
363378

364-
manifest_path = args.output_dir / "manifest.json"
365-
manifest_path.write_text(json.dumps(manifest, indent=2) + "\n", encoding="utf-8")
366-
print(f"wrote manifest: {manifest_path}", file=sys.stderr)
379+
write_manifest(manifest, args.output_dir)
367380
return 0
368381

369382

scripts/download_sample_media.sh

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ EOF
3030
}
3131

3232
output_dir="dist/test-fixtures/open-license/cordkillers-572"
33+
output_dir_set=0
3334
dry_run=0
3435

3536
for arg in "$@"; do
@@ -47,7 +48,13 @@ for arg in "$@"; do
4748
exit 1
4849
;;
4950
*)
51+
if [[ "$output_dir_set" -eq 1 ]]; then
52+
echo "error: multiple output directories given: $output_dir and $arg" >&2
53+
usage >&2
54+
exit 1
55+
fi
5056
output_dir="$arg"
57+
output_dir_set=1
5158
;;
5259
esac
5360
done
@@ -68,6 +75,7 @@ crf=${SAMPLE_CRF:-23}
6875

6976
mkdir -p "$output_dir"
7077
output_video="$output_dir/${basename}.mp4"
78+
partial_video="$output_dir/${basename}.partial.mp4"
7179
attribution_file="$output_dir/ATTRIBUTION.md"
7280
command_file="$output_dir/ffmpeg-command.txt"
7381

@@ -89,6 +97,13 @@ ffmpeg_command=(
8997
"$output_video"
9098
)
9199

100+
# The encode itself targets a .partial file that is atomically renamed on
101+
# success, so an interrupted download never leaves a truncated sample that
102+
# looks complete to later "reuse existing outputs" passes.
103+
run_command=("${ffmpeg_command[@]}")
104+
run_index=$((${#run_command[@]} - 1))
105+
run_command[run_index]="$partial_video"
106+
92107
cat >"$attribution_file" <<EOF
93108
# Open-license sample media attribution
94109
@@ -142,7 +157,10 @@ if ! command -v ffmpeg >/dev/null 2>&1; then
142157
exit 1
143158
fi
144159

145-
"${ffmpeg_command[@]}"
160+
trap 'rm -f "$partial_video"' EXIT
161+
"${run_command[@]}"
162+
mv "$partial_video" "$output_video"
163+
trap - EXIT
146164

147165
if command -v ffprobe >/dev/null 2>&1; then
148166
ffprobe -v error -show_entries format=duration,size -of default=noprint_wrappers=1 "$output_video"

scripts/scan_podcast.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,26 @@ let outputDir = URL(fileURLWithPath: CommandLine.arguments[2], isDirectory: true
173173
let interval = Double(CommandLine.arguments.count >= 4 ? CommandLine.arguments[3] : "0.5") ?? 0.5
174174

175175
let fm = FileManager.default
176+
177+
if !fm.fileExists(atPath: inputPath) {
178+
fputs("error: input file not found: \(inputPath)\n", stderr)
179+
exit(1)
180+
}
181+
if !(interval > 0) {
182+
fputs("error: sample interval must be greater than 0\n", stderr)
183+
exit(1)
184+
}
176185
try fm.createDirectory(at: outputDir, withIntermediateDirectories: true)
177186
let thumbsDir = outputDir.appendingPathComponent("thumbs", isDirectory: true)
178187
try fm.createDirectory(at: thumbsDir, withIntermediateDirectories: true)
179188

180189
let asset = AVURLAsset(url: URL(fileURLWithPath: inputPath))
181190
let durationSeconds = CMTimeGetSeconds(asset.duration)
191+
192+
if !durationSeconds.isFinite || durationSeconds <= 0 {
193+
fputs("error: could not read a media duration from \(inputPath); is it a valid video file?\n", stderr)
194+
exit(1)
195+
}
182196
let frameGenerator = AVAssetImageGenerator(asset: asset)
183197
frameGenerator.appliesPreferredTrackTransform = true
184198
frameGenerator.maximumSize = CGSize(width: 160, height: 90)
@@ -212,6 +226,15 @@ for i in 0..<sampleCount {
212226
}
213227

214228
let scanElapsed = Date().timeIntervalSince(scanStart)
229+
230+
if samples.count < 3 {
231+
fputs(
232+
"error: sampled only \(samples.count) frame(s); need at least 3. "
233+
+ "Audio-only inputs are not supported by the visual scanner.\n",
234+
stderr)
235+
exit(1)
236+
}
237+
215238
let diffs = samples.dropFirst().map { $0.diff }
216239
let p95 = percentile(diffs, 0.95)
217240
let p98 = percentile(diffs, 0.98)

scripts/test.sh

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@ set -euo pipefail
44
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
55
cd "$ROOT_DIR"
66

7-
bash tests/test_transcribe_video.sh
8-
bash tests/test_prepare_transcript_analysis.sh
9-
bash tests/test_scan_podcast.sh
10-
bash tests/test_cut_clips.sh
11-
bash tests/test_youtube_publish.sh
12-
bash tests/test_download_sample_media.sh
13-
bash tests/test_launcher.sh
7+
# Discover smoke tests instead of maintaining a registry; a new
8+
# tests/test_*.sh file is automatically picked up.
9+
for smoke_test in tests/test_*.sh; do
10+
echo "==> $smoke_test"
11+
bash "$smoke_test"
12+
done

tests/smoke.test.ts

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import { describe, expect, test } from "vitest";
22
import { execFileSync } from "node:child_process";
3+
import { readdirSync } from "node:fs";
34

4-
const smokeTests = [
5-
"tests/test_transcribe_video.sh",
6-
"tests/test_prepare_transcript_analysis.sh",
7-
"tests/test_scan_podcast.sh",
8-
"tests/test_cut_clips.sh",
9-
"tests/test_youtube_publish.sh",
10-
"tests/test_download_sample_media.sh",
11-
"tests/test_launcher.sh",
12-
];
5+
// Discover smoke tests instead of maintaining a registry; a new
6+
// tests/test_*.sh file is automatically picked up.
7+
const smokeTests = readdirSync("tests")
8+
.filter((name) => name.startsWith("test_") && name.endsWith(".sh"))
9+
.sort()
10+
.map((name) => `tests/${name}`);
11+
12+
if (smokeTests.length === 0) {
13+
throw new Error("no tests/test_*.sh smoke tests found");
14+
}
1315

1416
describe("podguy smoke tests", () => {
1517
for (const script of smokeTests) {

tests/test_cut_clips_parsing.sh

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5+
cd "$ROOT_DIR"
6+
7+
# Pure parsing coverage via --dry-run: no ffmpeg or real media required.
8+
9+
TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/podguy-cut-parse-test.XXXXXX")"
10+
trap 'rm -rf "$TMP_DIR"' EXIT
11+
12+
media="$TMP_DIR/source with spaces.mp4"
13+
printf 'fake media bytes' >"$media"
14+
15+
# Markdown with mixed range separators, fractional seconds, and a duplicate.
16+
cat >"$TMP_DIR/clips.md" <<'EOF'
17+
# Clip candidates
18+
19+
## Arrow separator
20+
- 00:00:01 --> 00:00:03 — arrow range
21+
22+
## Dash separator
23+
- 00:01:12.5 - 00:02:03 — fractional start
24+
25+
## Word separator
26+
- 00:03:00 to 00:03:30 — word range
27+
28+
## Duplicate of the arrow range
29+
- 00:00:01 - 00:00:03 — should be deduped
30+
EOF
31+
32+
uv run python scripts/cut_clips.py "$media" "$TMP_DIR/clips.md" "$TMP_DIR/md" --dry-run >/dev/null
33+
34+
cat >"$TMP_DIR/clips.json" <<'EOF'
35+
{
36+
"clips": [
37+
{ "title": "JSON clip", "start": "00:00:05", "end": "00:00:09" }
38+
]
39+
}
40+
EOF
41+
42+
uv run python scripts/cut_clips.py "$media" "$TMP_DIR/clips.json" "$TMP_DIR/json" --dry-run >/dev/null
43+
44+
cat >"$TMP_DIR/clips.csv" <<'EOF'
45+
title,start,end
46+
CSV clip,00:00:10,00:00:14
47+
EOF
48+
49+
uv run python scripts/cut_clips.py "$media" "$TMP_DIR/clips.csv" "$TMP_DIR/csv" --dry-run >/dev/null
50+
51+
uv run python - "$TMP_DIR" <<'PY'
52+
import json
53+
import sys
54+
from pathlib import Path
55+
56+
tmp = Path(sys.argv[1])
57+
58+
md = json.loads((tmp / "md" / "manifest.json").read_text())
59+
clips = md["clips"]
60+
assert len(clips) == 3, f"expected 3 deduped markdown clips, got {len(clips)}"
61+
starts = [clip["start"] for clip in clips]
62+
assert starts == [1.0, 72.5, 180.0], starts
63+
assert clips[1]["end"] == 123.0, clips[1]
64+
65+
js = json.loads((tmp / "json" / "manifest.json").read_text())
66+
assert len(js["clips"]) == 1
67+
assert js["clips"][0]["start"] == 5.0
68+
assert js["clips"][0]["title"] == "JSON clip"
69+
70+
csv = json.loads((tmp / "csv" / "manifest.json").read_text())
71+
assert len(csv["clips"]) == 1
72+
assert csv["clips"][0]["start"] == 10.0
73+
assert csv["clips"][0]["end"] == 14.0
74+
75+
# The dry-run command list must keep the spaced media path as one argument.
76+
assert any("source with spaces.mp4" in part for part in md["clips"][0]["command"])
77+
78+
print("cut_clips parsing assertions passed")
79+
PY
80+
81+
echo "ok: cut_clips parsing smoke test passed"

tests/test_scan_podcast.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,28 @@ for column in ["time_seconds", "timecode", "label", "thumb_path"]:
5252
print("scan fixture assertions passed")
5353
PY
5454

55+
# Degenerate inputs must fail with a clean error, not a Swift crash.
56+
57+
if swift scripts/scan_podcast.swift "$work_dir/does-not-exist.mp4" "$work_dir/scan-missing" 0.5 >/dev/null 2>"$work_dir/missing.err"; then
58+
echo "error: expected missing input to fail" >&2
59+
exit 1
60+
fi
61+
grep -q "error: input file not found" "$work_dir/missing.err"
62+
63+
if swift scripts/scan_podcast.swift "$fixture" "$work_dir/scan-interval" 0 >/dev/null 2>"$work_dir/interval.err"; then
64+
echo "error: expected zero interval to fail" >&2
65+
exit 1
66+
fi
67+
grep -q "error: sample interval must be greater than 0" "$work_dir/interval.err"
68+
69+
if command -v ffmpeg >/dev/null 2>&1; then
70+
audio_fixture="$work_dir/audio-only.wav"
71+
ffmpeg -hide_banner -loglevel error -y -f lavfi -i "sine=frequency=440:duration=3" "$audio_fixture"
72+
if swift scripts/scan_podcast.swift "$audio_fixture" "$work_dir/scan-audio" 0.5 >/dev/null 2>"$work_dir/audio.err"; then
73+
echo "error: expected audio-only input to fail cleanly" >&2
74+
exit 1
75+
fi
76+
grep -q "error:" "$work_dir/audio.err"
77+
fi
78+
5579
echo "ok: scan smoke test passed"

0 commit comments

Comments
 (0)