tts and stt servers with readme updated

Kamalabot · Kamalabot · commit 214a497ce2df · 2025-06-08T16:49:53.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@ media/
 slides/
 # Model stuff
 *.onnx
+*.bin
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/tts_n_stt/README.md b/tts_n_stt/README.md
@@ -0,0 +1,31 @@
+The folder contains the python servers for both
+Text to Speech and Speech to Text conversion. The
+scripts use uv package manager for dependencies.
+You can refer to this youtube video for more
+details: https://youtu.be/LZXps8KE4XM
+
+Text to Speech with Kokoro Model:
+
+The kokorotts models has to be downloaded for the
+app.py to work.
+
+wget
+https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx
+
+wget
+https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin
+
+The app.py file is the Text to Speech Gradio
+server. Run it with below command
+
+uv run app.py
+
+The onnx and bin files are not commited to the
+repo. So you have to download it.
+
+Speech to Text with Whisper Model:
+
+The stt_app.py is the Speech to Text Flask Server.
+Run it with below command
+
+uv run stt_app.py
diff --git a/tts_n_stt/app.py b/tts_n_stt/app.py
diff --git a/tts_n_stt/stt_app.py b/tts_n_stt/stt_app.py
@@ -0,0 +1,50 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#     "faster-whisper",
+#     "flask",
+# ]
+# ///
+from flask import Flask, request, jsonify
+from faster_whisper import WhisperModel
+import os
+
+app = Flask(__name__)
+
+# Load Whisper model (you can choose size: tiny, base, small, medium, large)
+model = WhisperModel("base", compute_type="auto")
+
+@app.route("/")
+def index():
+    return "Whisper Transcription API is running."
+
+@app.route("/transcribe", methods=["POST"])
+def transcribe():
+    if 'file' not in request.files:
+        return jsonify({"error": "No file uploaded"}), 400
+
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({"error": "Empty filename"}), 400
+
+    # Save file temporarily
+    filepath = os.path.join("/tmp", file.filename)
+    file.save(filepath)
+
+    segments, _ = model.transcribe(filepath)
+
+    result = []
+    for segment in segments:
+        result.append({
+            "start": segment.start,
+            "end": segment.end,
+            "text": segment.text
+        })
+
+    os.remove(filepath)  # clean up
+
+    return jsonify({"transcription": result})
+
+if __name__ == "__main__":
+    app.run(debug=True, port=8000)
+