|
2 | 2 | # requires-python = ">=3.11" |
3 | 3 | # dependencies = [ |
4 | 4 | # "pydub", |
| 5 | +# "faster-whisper", |
5 | 6 | # ] |
6 | 7 | # /// |
| 8 | + |
7 | 9 | import json |
8 | 10 | import sys |
9 | 11 | from pathlib import Path |
10 | 12 | from pydub import AudioSegment |
| 13 | +from faster_whisper import WhisperModel |
| 14 | +import os |
| 15 | + |
| 16 | +# Load Whisper model (you can choose size: tiny, base, small, medium, large) |
| 17 | +model = WhisperModel("base", compute_type="auto") |
| 18 | + |
| 19 | +def transcribe(filepath): |
| 20 | + segments, _ = model.transcribe(filepath) |
| 21 | + |
| 22 | + result = [] |
| 23 | + for segment in segments: |
| 24 | + result.append({ |
| 25 | + "start": segment.start, |
| 26 | + "end": segment.end, |
| 27 | + "text": segment.text |
| 28 | + }) |
| 29 | + |
| 30 | + return result |
11 | 31 |
|
12 | 32 | def format_timestamp(seconds: float) -> str: |
13 | 33 | """Convert seconds to SRT timestamp format: HH:MM:SS,mmm""" |
@@ -43,17 +63,13 @@ def generate_srt(transcript, audio_file, output_file): |
43 | 63 | print(f"SRT file created: {output_file}") |
44 | 64 |
|
45 | 65 | if __name__ == "__main__": |
46 | | - if len(sys.argv) != 4: |
47 | | - print("Usage: uv run generate_srt.py transcript.json input.mp3 output.srt") |
| 66 | + if len(sys.argv) != 3: |
| 67 | + print("Usage: uv run json_to_srt.py input.mp3 output.srt") |
48 | 68 | sys.exit(1) |
49 | 69 |
|
50 | | - transcript_file = sys.argv[1] |
51 | | - audio_file = sys.argv[2] |
52 | | - output_file = sys.argv[3] |
53 | | - |
54 | | - with open(transcript_file, "r", encoding="utf-8") as f: |
55 | | - data = json.load(f) |
56 | | - transcript = data["transcription"] |
| 70 | + audio_file = sys.argv[1] |
| 71 | + output_file = sys.argv[2] |
57 | 72 |
|
| 73 | + transcript = transcribe(audio_file) |
58 | 74 |
|
59 | 75 | generate_srt(transcript, audio_file, output_file) |
0 commit comments