Skip to content

Commit d0c2eb2

Browse files
committed
Automated testing
1 parent eea41ca commit d0c2eb2

6 files changed

Lines changed: 404 additions & 0 deletions

File tree

.github/workflows/tests.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: Tests
2+
3+
on:
4+
push:
5+
branches: ["main"]
6+
pull_request:
7+
8+
jobs:
9+
unit:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
14+
- name: Install uv
15+
uses: astral-sh/setup-uv@v5
16+
with:
17+
python-version: "3.13"
18+
19+
- name: Install dependencies
20+
run: uv sync --group dev
21+
22+
- name: Run unit tests
23+
run: uv run pytest tests/ -m "not network" -v
24+
25+
smoke:
26+
name: smoke (${{ matrix.os }})
27+
runs-on: ${{ matrix.os }}
28+
strategy:
29+
fail-fast: false
30+
matrix:
31+
os: [ubuntu-latest, macos-latest, windows-latest]
32+
33+
steps:
34+
- uses: actions/checkout@v4
35+
36+
- name: Install uv
37+
uses: astral-sh/setup-uv@v5
38+
with:
39+
python-version: "3.13"
40+
41+
- name: Install dependencies
42+
run: uv sync --group dev
43+
44+
- name: Run smoke tests
45+
run: uv run pytest tests/test_smoke.py -m network -v

justfile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ run *args:
1414
config *args:
1515
uv run transcribe config "$@"
1616

17+
# Run unit tests (no network)
18+
test:
19+
uv run pytest tests/ -m "not network" -v
20+
21+
# Run smoke tests against real YouTube (requires network)
22+
smoke:
23+
uv run pytest tests/test_smoke.py -m network -v
24+
1725
# List available Whisper models
1826
models:
1927
@echo "tiny (~75MB) fastest, lowest accuracy"

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ dependencies = [
1515
[project.scripts]
1616
transcribe = "transcribe.cli:app"
1717

18+
[dependency-groups]
19+
dev = ["pytest>=8.0.0"]
20+
1821
[build-system]
1922
requires = ["hatchling"]
2023
build-backend = "hatchling.build"
2124

2225
[tool.hatch.build.targets.wheel]
2326
packages = ["src/transcribe"]
27+
28+
[tool.pytest.ini_options]
29+
markers = ["network: requires network access and real external services"]

tests/test_cli.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
import pytest
2+
from pathlib import Path
3+
from unittest.mock import MagicMock, patch
4+
5+
from typer.testing import CliRunner
6+
7+
from transcribe.cli import _extract_video_id, sanitize_filename, unique_path, app
8+
9+
runner = CliRunner()
10+
11+
12+
class TestExtractVideoId:
13+
def test_standard_watch_url(self):
14+
assert _extract_video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ") == "dQw4w9WgXcQ"
15+
16+
def test_watch_url_with_extra_params(self):
17+
assert _extract_video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=42s&list=PLxxx") == "dQw4w9WgXcQ"
18+
19+
def test_short_url(self):
20+
assert _extract_video_id("https://youtu.be/dQw4w9WgXcQ") == "dQw4w9WgXcQ"
21+
22+
def test_short_url_with_params(self):
23+
assert _extract_video_id("https://youtu.be/dQw4w9WgXcQ?si=abc123") == "dQw4w9WgXcQ"
24+
25+
def test_embed_url(self):
26+
assert _extract_video_id("https://www.youtube.com/embed/dQw4w9WgXcQ") == "dQw4w9WgXcQ"
27+
28+
def test_bare_video_id(self):
29+
assert _extract_video_id("dQw4w9WgXcQ") == "dQw4w9WgXcQ"
30+
31+
def test_invalid_returns_none(self):
32+
assert _extract_video_id("https://example.com/watch?v=tooshort") is None
33+
34+
def test_empty_string_returns_none(self):
35+
assert _extract_video_id("") is None
36+
37+
def test_random_text_returns_none(self):
38+
assert _extract_video_id("not a url at all") is None
39+
40+
def test_id_with_hyphens_and_underscores(self):
41+
# YouTube IDs are exactly 11 chars and can contain - and _
42+
assert _extract_video_id("abc-def_ghi") == "abc-def_ghi"
43+
44+
45+
class TestSanitizeFilename:
46+
@pytest.mark.parametrize("char", ['<', '>', ':', '"', '/', '\\', '|', '?', '*'])
47+
def test_illegal_chars_replaced(self, char):
48+
assert "_" in sanitize_filename(f"name{char}here")
49+
50+
def test_normal_name_unchanged(self):
51+
assert sanitize_filename("My Video Title") == "My Video Title"
52+
53+
def test_leading_trailing_spaces_stripped(self):
54+
assert sanitize_filename(" hello ") == "hello"
55+
56+
def test_max_length_truncated(self):
57+
long_name = "a" * 300
58+
assert len(sanitize_filename(long_name)) == 200
59+
60+
def test_custom_max_length(self):
61+
assert len(sanitize_filename("a" * 50, max_length=10)) == 10
62+
63+
def test_empty_string(self):
64+
assert sanitize_filename("") == ""
65+
66+
67+
class TestUniquePath:
68+
def test_nonexistent_path_returned_unchanged(self, tmp_path):
69+
p = tmp_path / "file.txt"
70+
assert unique_path(p) == p
71+
72+
def test_existing_file_gets_counter(self, tmp_path):
73+
p = tmp_path / "file.txt"
74+
p.write_text("x")
75+
result = unique_path(p)
76+
assert result == tmp_path / "file (1).txt"
77+
78+
def test_counter_increments_until_free(self, tmp_path):
79+
p = tmp_path / "file.txt"
80+
p.write_text("x")
81+
(tmp_path / "file (1).txt").write_text("x")
82+
(tmp_path / "file (2).txt").write_text("x")
83+
assert unique_path(p) == tmp_path / "file (3).txt"
84+
85+
def test_preserves_extension(self, tmp_path):
86+
p = tmp_path / "audio.mp3"
87+
p.write_text("x")
88+
assert unique_path(p).suffix == ".mp3"
89+
90+
def test_no_extension(self, tmp_path):
91+
p = tmp_path / "transcript"
92+
p.write_text("x")
93+
result = unique_path(p)
94+
assert result == tmp_path / "transcript (1)"
95+
96+
97+
# ---------------------------------------------------------------------------
98+
# Integration tests for the `run` command (external I/O fully mocked)
99+
# ---------------------------------------------------------------------------
100+
101+
YT_URL = "https://www.youtube.com/watch?v=jNQXAC9IVRw"
102+
YT_ID = "jNQXAC9IVRw"
103+
CAPTION_TEXT = "Hello from captions"
104+
WHISPER_TEXT = "Hello from Whisper"
105+
106+
107+
@pytest.fixture()
108+
def captions_found():
109+
"""Patch fetch_youtube_captions to return a fake transcript."""
110+
with patch("transcribe.cli.fetch_youtube_captions", return_value=(CAPTION_TEXT, False)) as m:
111+
yield m
112+
113+
114+
@pytest.fixture()
115+
def no_captions():
116+
"""Patch fetch_youtube_captions to signal no captions available."""
117+
with patch("transcribe.cli.fetch_youtube_captions", return_value=None) as m:
118+
yield m
119+
120+
121+
@pytest.fixture()
122+
def whisper_ok():
123+
"""Patch _run_whisper to return a fake transcript without loading a model."""
124+
with patch("transcribe.cli._run_whisper", return_value=(WHISPER_TEXT, 1.5)) as m:
125+
yield m
126+
127+
128+
@pytest.fixture()
129+
def fake_yt_dlp(tmp_path):
130+
"""Patch yt_dlp and TemporaryDirectory so no audio is downloaded."""
131+
audio_file = tmp_path / "audio.webm"
132+
audio_file.write_bytes(b"fake")
133+
134+
class FakeYDL:
135+
def __init__(self, opts):
136+
pass
137+
138+
def __enter__(self):
139+
return self
140+
141+
def __exit__(self, *_):
142+
pass
143+
144+
def extract_info(self, url, download):
145+
return {"title": "Fake Video Title"}
146+
147+
# yt_dlp is imported lazily inside the function, so patch the module directly
148+
with patch("yt_dlp.YoutubeDL", FakeYDL):
149+
# Make TemporaryDirectory yield our tmp_path so the glob finds the audio file
150+
mock_td = MagicMock()
151+
mock_td.return_value.__enter__ = MagicMock(return_value=str(tmp_path))
152+
mock_td.return_value.__exit__ = MagicMock(return_value=False)
153+
with patch("transcribe.cli.tempfile.TemporaryDirectory", mock_td):
154+
yield tmp_path
155+
156+
157+
class TestRunCommand:
158+
# --- caption happy path ---
159+
160+
def test_print_with_captions(self, captions_found):
161+
result = runner.invoke(app, ["run", YT_URL, "--print"])
162+
assert result.exit_code == 0
163+
assert CAPTION_TEXT in result.stdout
164+
165+
def test_bare_video_id_with_captions(self, captions_found):
166+
result = runner.invoke(app, ["run", YT_ID, "--print"])
167+
assert result.exit_code == 0
168+
assert CAPTION_TEXT in result.stdout
169+
170+
def test_captions_saved_to_output(self, captions_found, tmp_path):
171+
out = tmp_path / "out.txt"
172+
result = runner.invoke(app, ["run", YT_URL, "--output", str(out)])
173+
assert result.exit_code == 0
174+
assert out.exists()
175+
assert out.read_text() == CAPTION_TEXT
176+
177+
def test_captions_saved_auto_named(self, captions_found, tmp_path):
178+
"""Without --output, file is saved under output_dir from config."""
179+
with patch("transcribe.cli._fetch_title", return_value="My Video"):
180+
with patch("transcribe.cli.cfg_module.load") as mock_load:
181+
mock_load.return_value = {
182+
"defaults": {"model": "turbo", "language": "", "output_dir": str(tmp_path), "output_extension": "txt"},
183+
"whisper": {"device": "cpu", "compute_type": "int8", "beam_size": 5, "vad_filter": True},
184+
}
185+
result = runner.invoke(app, ["run", YT_URL])
186+
assert result.exit_code == 0
187+
assert (tmp_path / "My Video.txt").exists()
188+
189+
# --- Whisper fallback ---
190+
191+
def test_print_whisper_fallback(self, no_captions, whisper_ok, fake_yt_dlp):
192+
result = runner.invoke(app, ["run", YT_URL, "--print"])
193+
assert result.exit_code == 0
194+
assert WHISPER_TEXT in result.stdout
195+
196+
def test_force_whisper_skips_captions(self, captions_found, whisper_ok, fake_yt_dlp):
197+
result = runner.invoke(app, ["run", YT_URL, "--force-whisper", "--print"])
198+
assert result.exit_code == 0
199+
captions_found.assert_not_called()
200+
assert WHISPER_TEXT in result.stdout
201+
202+
# --- local file ---
203+
204+
def test_local_file_whisper(self, whisper_ok, tmp_path):
205+
audio = tmp_path / "clip.mp3"
206+
audio.write_bytes(b"fake audio")
207+
out = tmp_path / "clip.txt"
208+
result = runner.invoke(app, ["run", str(audio), "--output", str(out)])
209+
assert result.exit_code == 0
210+
assert out.read_text() == WHISPER_TEXT
211+
whisper_ok.assert_called_once()
212+
213+
def test_local_file_print(self, whisper_ok, tmp_path):
214+
audio = tmp_path / "clip.mp3"
215+
audio.write_bytes(b"fake audio")
216+
result = runner.invoke(app, ["run", str(audio), "--print"])
217+
assert result.exit_code == 0
218+
assert WHISPER_TEXT in result.stdout
219+
220+
# --- error cases ---
221+
222+
def test_invalid_source_exits_nonzero(self):
223+
result = runner.invoke(app, ["run", "not-a-url-or-file"])
224+
assert result.exit_code != 0
225+
226+
def test_output_is_directory_exits_nonzero(self, captions_found, tmp_path):
227+
result = runner.invoke(app, ["run", YT_URL, "--output", str(tmp_path)])
228+
assert result.exit_code != 0
229+
230+
def test_print_and_output_warns(self, captions_found, tmp_path):
231+
out = tmp_path / "out.txt"
232+
result = runner.invoke(app, ["run", YT_URL, "--print", "--output", str(out)])
233+
assert result.exit_code == 0
234+
assert "Warning" in result.output

tests/test_config.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import tomllib
2+
import pytest
3+
from pathlib import Path
4+
from unittest.mock import patch
5+
6+
from transcribe import config as cfg_module
7+
8+
9+
class TestDeepCopy:
10+
def test_mutating_copy_does_not_affect_original(self):
11+
original = {"a": {"x": 1}, "b": 2}
12+
copy = cfg_module._deep_copy(original)
13+
copy["a"]["x"] = 99
14+
assert original["a"]["x"] == 1
15+
16+
def test_top_level_scalar_copied(self):
17+
original = {"a": {"x": 1}, "b": 2}
18+
copy = cfg_module._deep_copy(original)
19+
assert copy == original
20+
21+
22+
class TestMerge:
23+
def test_override_replaces_leaf(self):
24+
base = {"defaults": {"model": "turbo", "language": ""}}
25+
result = cfg_module._merge(base, {"defaults": {"model": "large-v3"}})
26+
assert result["defaults"]["model"] == "large-v3"
27+
assert result["defaults"]["language"] == "" # untouched
28+
29+
def test_unknown_key_in_override_is_added(self):
30+
base = {"defaults": {"model": "turbo"}}
31+
result = cfg_module._merge(base, {"new_section": {"foo": "bar"}})
32+
assert result["new_section"] == {"foo": "bar"}
33+
34+
def test_nested_dicts_merged_not_replaced(self):
35+
base = {"whisper": {"device": "cpu", "beam_size": 5}}
36+
result = cfg_module._merge(base, {"whisper": {"device": "cuda"}})
37+
assert result["whisper"]["beam_size"] == 5 # preserved
38+
assert result["whisper"]["device"] == "cuda"
39+
40+
def test_scalar_override_replaces_dict(self):
41+
# Non-dict override should win even if base has a dict
42+
base = {"key": {"nested": 1}}
43+
result = cfg_module._merge(base, {"key": "flat"})
44+
assert result["key"] == "flat"
45+
46+
47+
class TestLoad:
48+
def test_missing_config_returns_defaults(self, tmp_path):
49+
missing = tmp_path / "nonexistent.toml"
50+
with patch.object(cfg_module, "CONFIG_PATH", missing):
51+
cfg = cfg_module.load()
52+
assert cfg["defaults"]["model"] == "turbo"
53+
assert cfg["whisper"]["device"] == "cpu"
54+
55+
def test_partial_toml_merged_with_defaults(self, tmp_path):
56+
toml_file = tmp_path / "config.toml"
57+
toml_file.write_text('[defaults]\nmodel = "large-v3"\n')
58+
with patch.object(cfg_module, "CONFIG_PATH", toml_file):
59+
cfg = cfg_module.load()
60+
assert cfg["defaults"]["model"] == "large-v3"
61+
assert cfg["defaults"]["output_extension"] == "txt" # from defaults
62+
63+
def test_invalid_toml_raises_system_exit(self, tmp_path):
64+
bad = tmp_path / "config.toml"
65+
bad.write_text("this is not valid toml ][")
66+
with patch.object(cfg_module, "CONFIG_PATH", bad):
67+
with pytest.raises(SystemExit, match="not valid TOML"):
68+
cfg_module.load()

0 commit comments

Comments
 (0)