|
1 | 1 | from moviepy import VideoFileClip, TextClip, CompositeVideoClip |
2 | 2 | from faster_whisper import WhisperModel |
| 3 | +from tqdm import tqdm |
3 | 4 |
|
4 | 5 | # Load video and extract audio |
5 | 6 | video = VideoFileClip("./sales_training_awareness_src.mp4") |
6 | | -video.audio.write_audiofile("sales_training_awareness.wav") # type: ignore |
| 7 | +video.audio.write_audiofile("auto_edit.wav") # type: ignore |
7 | 8 |
|
8 | 9 | # Transcribe with faster-whisper |
9 | 10 | model = WhisperModel("small") # You can use 'small' or 'medium' if needed |
10 | | -segments, _ = model.transcribe("sales_training_awareness.wav") |
11 | | -# print(f"Segments are: {segments}") |
| 11 | +# model_v2 = WhisperModel("large-v2") # when using tamil |
| 12 | +segments, _ = model.transcribe("auto_edit.wav") |
| 13 | +# when using tamil |
| 14 | +# segments, _ = model_v2.transcribe("/content/training_tamil.wav", language="ta") |
| 15 | + |
| 16 | +# segments are lazy generators, so executing below line will take long time |
| 17 | +# Avoid background music, poor mics, or mixed language |
| 18 | +segments = list(tqdm(segments)) # Shows a progress bar |
12 | 19 |
|
13 | 20 | # Build typewriter subtitles for all segments |
14 | 21 | subtitle_clips = [] |
15 | 22 | with open("subtitle.srt", 'w') as fsrt: |
16 | 23 | for idx, segment in enumerate(segments): |
17 | 24 | fsrt.write(f"{segment.text} : {segment.start}, {segment.end - segment.start}") |
18 | 25 | subtitle_clips.append([segment.text, segment.start, segment.end - segment.start]) |
19 | | - if idx % 5 == 0: |
20 | | - print(f"At idx: {idx}") |
21 | | - break |
22 | 26 |
|
23 | 27 |
|
24 | 28 | print("Writing Extracted Subtitle:") |
|
0 commit comments