insightbuilder
diff --git a/‎tts_n_stt/README.md‎
Lines changed: 7 additions & 5 deletions b/‎tts_n_stt/README.md‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎tts_n_stt/apple_description.mp3‎
-31.3 KB b/‎tts_n_stt/apple_description.mp3‎
-31.3 KB
diff --git a/‎tts_n_stt/json_to_srt.py‎ ‎tts_n_stt/mp3_to_srt.py‎tts_n_stt/json_to_srt.py renamed to tts_n_stt/mp3_to_srt.py
Lines changed: 25 additions & 9 deletions b/‎tts_n_stt/json_to_srt.py‎ ‎tts_n_stt/mp3_to_srt.py‎tts_n_stt/json_to_srt.py renamed to tts_n_stt/mp3_to_srt.py
Lines changed: 25 additions & 9 deletions
diff --git a/‎tts_n_stt/subtitles.json‎
Lines changed: 0 additions & 29 deletions b/‎tts_n_stt/subtitles.json‎
Lines changed: 0 additions & 29 deletions
@@ -30,17 +30,19 @@ Run it with below command
 
 uv run stt_app.py
 
-text to 
-
 ## text to speech with simpler pydub
 
 - text_to_mp3.py gives the speech file 
 
-- Use the above to get synthetic speech, which is then used for speech to text in json format using stt_app.py
+- Use the above to get synthetic speech, which is then used for speech to text in json format using stt_app.py(use mp3_to_srt.py directly)
 
 - subtitles.srt is recieved in JSon Format. renamed it
 
-## Use the JSON to SRT converter:
+## Use the mp3 to SRT converter using mp3_to_srt.py:
+
+Ideally you will have your mp3 file. You will want to transcribe that. 
+
+- uv run mp3_to_srt.py your_voice.mp3 description_subs.srt
 
-- 
+The above command will do the needful.
 
@@ -2,12 +2,32 @@
 # requires-python = ">=3.11"
 # dependencies = [
 #     "pydub",
+#     "faster-whisper",
 # ]
 # ///
+
 import json
 import sys
 from pathlib import Path
 from pydub import AudioSegment
+from faster_whisper import WhisperModel
+import os
+
+# Load Whisper model (you can choose size: tiny, base, small, medium, large)
+model = WhisperModel("base", compute_type="auto")
+
+def transcribe(filepath):
+    segments, _ = model.transcribe(filepath)
+
+    result = []
+    for segment in segments:
+        result.append({
+            "start": segment.start,
+            "end": segment.end,
+            "text": segment.text
+        })
+
+    return result
 
 def format_timestamp(seconds: float) -> str:
     """Convert seconds to SRT timestamp format: HH:MM:SS,mmm"""
@@ -43,17 +63,13 @@ def generate_srt(transcript, audio_file, output_file):
     print(f"SRT file created: {output_file}")
 
 if __name__ == "__main__":
-    if len(sys.argv) != 4:
-        print("Usage: uv run generate_srt.py transcript.json input.mp3 output.srt")
+    if len(sys.argv) != 3:
+        print("Usage: uv run json_to_srt.py input.mp3 output.srt")
         sys.exit(1)
 
-    transcript_file = sys.argv[1]
-    audio_file = sys.argv[2]
-    output_file = sys.argv[3]
-
-    with open(transcript_file, "r", encoding="utf-8") as f:
-        data = json.load(f)
-        transcript = data["transcription"]
+    audio_file = sys.argv[1]
+    output_file = sys.argv[2]
 
+    transcript = transcribe(audio_file)
 
     generate_srt(transcript, audio_file, output_file)