@@ -131,42 +131,49 @@ cleanup_old_audio() {
131131original_live_transcription () {
132132 local model_file=$1
133133 local language=$2
134- local recording_file=" $AUDIO_DOWNLOAD_DIR /${TIMESTAMP} _live_recording.wav"
134+ local recording_wav=" $AUDIO_DOWNLOAD_DIR /${TIMESTAMP} _live_recording.wav"
135+ local recording_raw=" /tmp/live_recording_${TIMESTAMP} .raw"
135136 local transcript_file=" $TRANSCRIPT_DIR /${TIMESTAMP} _live_transcript"
136-
137+
137138 print_color " $CYAN " " 🎙️ ORIGINAL Live Transcription Setup"
138139 echo " " >&2
139- print_color " $YELLOW " " Recording will be saved to: $recording_file " >&2
140+ print_color " $YELLOW " " Recording will be saved to: $recording_wav " >&2
140141 print_color " $YELLOW " " Audio file will be kept for 7 days" >&2
141142 print_color " $YELLOW " " Live transcript will appear below every ~10 seconds:" >&2
142- print_color " $YELLOW " " Final transcript will be saved to: $transcript_file " >&2
143+ print_color " $YELLOW " " Final transcript will be saved to: ${ transcript_file} .txt " >&2
143144 echo " " >&2
144145 print_color " $BLUE " " Press Ctrl+C to stop recording and save transcript" >&2
145146 echo " " >&2
146-
147- # Start recording in background
147+
148+ # Record as raw PCM (no WAV header) so we can reliably read during recording.
149+ # Format: 16kHz, mono, 16-bit signed integer, little-endian = 32000 bytes/sec
148150 print_color " $GREEN " " 🔴 Recording started... (Press Ctrl+C to stop)" >&2
149- rec -r 16000 -c 1 " $recording_file " > /dev/null 2>&1 &
151+ rec -t raw - r 16000 -c 1 -b 16 -e signed-integer " $recording_raw " > /dev/null 2>&1 &
150152 local rec_pid=$!
151-
153+
152154 # Show live transcription area
153- print_color " $CYAN " " 📝 LIVE TRANSCRIPTION (appears in real-time) :" >&2
155+ print_color " $CYAN " " 📝 LIVE TRANSCRIPTION:" >&2
154156 print_color " $CYAN " " ════════════════════════════════════════════════════" >&2
155-
157+
156158 # Function to handle cleanup on exit
157159 cleanup () {
158160 print_color " $YELLOW " " \n🛑 Stopping recording..." >&2
159161 kill $rec_pid 2> /dev/null
160162 wait $rec_pid 2> /dev/null
161-
162- if [ -f " $recording_file " ] && [ -s " $recording_file " ]; then
163- print_color " $GREEN " " ✅ Recording saved: $recording_file " >&2
163+
164+ if [ -f " $recording_raw " ] && [ -s " $recording_raw " ]; then
165+ # Convert raw PCM to WAV for permanent storage and final transcription
166+ sox -t raw -r 16000 -c 1 -b 16 -e signed-integer -L \
167+ " $recording_raw " " $recording_wav " 2> /dev/null
168+ rm -f " $recording_raw "
169+
170+ print_color " $GREEN " " ✅ Recording saved: $recording_wav " >&2
164171 print_color " $YELLOW " " 📁 Audio file will be kept for 7 days" >&2
165-
172+
166173 # Final transcription of the complete recording
167174 print_color " $BLUE " " 🎯 Performing final transcription..." >&2
168- whisper-cli -m " $model_file " -f " $recording_file " -l " $language " -otxt -of " $transcript_file " -pp -nt >&2
169-
175+ whisper-cli -m " $model_file " -f " $recording_wav " -l " $language " -otxt -of " $transcript_file " -pp -nt >&2
176+
170177 if [ $? -eq 0 ] && [ -f " ${transcript_file} .txt" ]; then
171178 print_color " $GREEN " " ✅ Final transcript saved: ${transcript_file} .txt" >&2
172179 print_color " $YELLOW " " 📝 Final transcript preview:" >&2
@@ -179,22 +186,21 @@ original_live_transcription() {
179186 else
180187 print_color " $RED " " ❌ No recording was made!" >&2
181188 fi
182-
183- # Clean up old audio files
189+
190+ # Clean up
191+ rm -f " $recording_raw " 2> /dev/null
184192 cleanup_old_audio
185-
193+
186194 exit 0
187195 }
188-
196+
189197 # Set up signal handler for cleanup
190198 trap cleanup SIGINT SIGTERM
191-
199+
192200 # Incremental chunk-based live transcription
193- # rec writes 16kHz mono 16-bit PCM = 32000 bytes/sec.
194- # The WAV header is NOT finalized until rec stops, so we:
195- # 1. Estimate duration from file size
196- # 2. Tell sox to read the file as raw PCM (-t raw), bypassing the broken header
197- # 3. Use sox trim to extract time ranges directly
201+ # Recording is raw PCM: 16kHz, mono, 16-bit = 32000 bytes/sec, no header.
202+ # File size directly equals audio bytes, so duration = size / 32000.
203+ # sox reads the raw file and trims out time ranges into proper WAV chunks.
198204 local BYTES_PER_SEC=32000
199205 local MIN_CHUNK_SECS=10
200206 local OVERLAP_SECS=2
@@ -204,9 +210,9 @@ original_live_transcription() {
204210 touch " $running_transcript "
205211
206212 while kill -0 $rec_pid 2> /dev/null; do
207- if [ -f " $recording_file " ]; then
213+ if [ -f " $recording_raw " ]; then
208214 local current_size
209- current_size=$( stat -f%z " $recording_file " 2> /dev/null || echo " 0" )
215+ current_size=$( stat -f%z " $recording_raw " 2> /dev/null || echo " 0" )
210216 local current_secs=$(( current_size / BYTES_PER_SEC))
211217 local new_secs=$(( current_secs - last_secs))
212218
@@ -222,24 +228,22 @@ original_live_transcription() {
222228 fi
223229 local trim_duration=$(( current_secs - trim_start))
224230
225- # Read recording as raw PCM (bypasses unfinalised WAV header)
226- # and extract only the time range we need
231+ # Extract time range from raw PCM → proper WAV for whisper-cli
227232 sox -t raw -r 16000 -c 1 -b 16 -e signed-integer -L \
228- " $recording_file " " $chunk_file " \
233+ " $recording_raw " " $chunk_file " \
229234 trim " $trim_start " " $trim_duration " 2> /dev/null
230235
231236 if [ -f " $chunk_file " ] && [ -s " $chunk_file " ]; then
232237 local chunk_transcript=" /tmp/chunk_transcript_${TIMESTAMP} _${chunk_count} "
233238
234- # Transcribe the chunk
235239 whisper-cli -m " $model_file " -f " $chunk_file " -l " $language " -otxt -of " $chunk_transcript " -pp -nt > /dev/null 2>&1
236240
237241 if [ -f " ${chunk_transcript} .txt" ] && [ -s " ${chunk_transcript} .txt" ]; then
238242 local new_text
239243 new_text=$( cat " ${chunk_transcript} .txt" | sed ' /^$/d' )
240244 # Filter out Whisper hallucinations on silence/noise
241245 local filtered_text
242- filtered_text=$( echo " $new_text " | grep -viE ' ^\s*[\[\(\ *].*[\]\)\*]\s *$|^[[:space:]]*$' || true)
246+ filtered_text=$( echo " $new_text " | grep -viE ' ^[[:space:]]*[][( *].*[])*][[:space:]] *$|^[[:space:]]*$' || true)
243247 if [ -n " $filtered_text " ]; then
244248 echo " $filtered_text " >> " $running_transcript "
245249 print_color " $GREEN " " $filtered_text " >&2
@@ -257,7 +261,7 @@ original_live_transcription() {
257261
258262 # Clean up running transcript temp file
259263 rm -f " $running_transcript " 2> /dev/null
260-
264+
261265 # Wait for recording to complete
262266 wait $rec_pid
263267}
0 commit comments