Skip to content

Commit 6a54a41

Browse files
committed
updated get transcript
1 parent 4b2e530 commit 6a54a41

1 file changed

Lines changed: 10 additions & 6 deletions

File tree

movie_py_explored/get_script.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,28 @@
11
from moviepy import VideoFileClip, TextClip, CompositeVideoClip
22
from faster_whisper import WhisperModel
3+
from tqdm import tqdm
34

45
# Load video and extract audio
56
video = VideoFileClip("./sales_training_awareness_src.mp4")
6-
video.audio.write_audiofile("sales_training_awareness.wav") # type: ignore
7+
video.audio.write_audiofile("auto_edit.wav") # type: ignore
78

89
# Transcribe with faster-whisper
910
model = WhisperModel("small") # You can use 'small' or 'medium' if needed
10-
segments, _ = model.transcribe("sales_training_awareness.wav")
11-
# print(f"Segments are: {segments}")
11+
# model_v2 = WhisperModel("large-v2") # when using tamil
12+
segments, _ = model.transcribe("auto_edit.wav")
13+
# when using tamil
14+
# segments, _ = model_v2.transcribe("/content/training_tamil.wav", language="ta")
15+
16+
# segments are lazy generators, so executing below line will take long time
17+
# Avoid background music, poor mics, or mixed language
18+
segments = list(tqdm(segments)) # Shows a progress bar
1219

1320
# Build typewriter subtitles for all segments
1421
subtitle_clips = []
1522
with open("subtitle.srt", 'w') as fsrt:
1623
for idx, segment in enumerate(segments):
1724
fsrt.write(f"{segment.text} : {segment.start}, {segment.end - segment.start}")
1825
subtitle_clips.append([segment.text, segment.start, segment.end - segment.start])
19-
if idx % 5 == 0:
20-
print(f"At idx: {idx}")
21-
break
2226

2327

2428
print("Writing Extracted Subtitle:")

0 commit comments

Comments
 (0)