1515
1616# Load Whisper model (you can choose size: tiny, base, small, medium, large)
1717model = WhisperModel ("base" , compute_type = "auto" )
18+ tamil_model = WhisperModel ("flyingleafe/faster-whisper-large-v3" ,
19+ device = "cpu" , # change to "cuda" if you have GPU
20+ compute_type = "int8" , # for CPU efficiency
21+ )
1822
1923def transcribe (filepath ):
2024 segments , _ = model .transcribe (filepath )
@@ -27,8 +31,21 @@ def transcribe(filepath):
2731 "text" : segment .text
2832 })
2933
30- return result
34+ return result
35+
36+ def transcribe_tamil (filepath ):
37+ segments , _ = tamil_model .transcribe (filepath , beam_size = 5 , language = "ta" )
38+
39+ result = []
40+ for segment in segments :
41+ result .append ({
42+ "start" : segment .start ,
43+ "end" : segment .end ,
44+ "text" : segment .text
45+ })
3146
47+ return result
48+
3249def format_timestamp (seconds : float ) -> str :
3350 """Convert seconds to SRT timestamp format: HH:MM:SS,mmm"""
3451 millis = int (seconds * 1000 )
@@ -63,13 +80,19 @@ def generate_srt(transcript, audio_file, output_file):
6380 print (f"SRT file created: { output_file } " )
6481
6582if __name__ == "__main__" :
66- if len (sys .argv ) != 3 :
67- print ("Usage: uv run json_to_srt .py input.mp3 output.srt" )
83+ if len (sys .argv ) != 4 :
84+ print ("Usage: uv run mp3_to_srt .py ta input.mp3 output.srt" )
6885 sys .exit (1 )
6986
70- audio_file = sys .argv [1 ]
71- output_file = sys .argv [2 ]
87+ lang = sys .argv [1 ]
88+ audio_file = sys .argv [2 ]
89+ output_file = sys .argv [3 ]
7290
73- transcript = transcribe (audio_file )
91+ if lang == "ta" :
92+ print ("Selected Tamil Language." )
93+ transcript = transcribe_tamil (audio_file )
94+ else :
95+ print ("Default is English Language" )
96+ transcript = transcribe (audio_file )
7497
7598 generate_srt (transcript , audio_file , output_file )
0 commit comments