@@ -132,7 +132,7 @@ original_live_transcription() {
132132 local model_file=$1
133133 local language=$2
134134 local recording_file=" $AUDIO_DOWNLOAD_DIR /${TIMESTAMP} _live_recording.wav"
135- local transcript_file=" $TRANSCRIPT_DIR /${TIMESTAMP} _live_transcript.txt "
135+ local transcript_file=" $TRANSCRIPT_DIR /${TIMESTAMP} _live_transcript"
136136
137137 print_color " $CYAN " " 🎙️ ORIGINAL Live Transcription Setup"
138138 echo " " >&2
@@ -190,30 +190,38 @@ original_live_transcription() {
190190 trap cleanup SIGINT SIGTERM
191191
192192 # Incremental chunk-based live transcription
193- local last_transcribed=0
194- local chunk_count=0
193+ # rec writes 16kHz mono 16-bit PCM = 32000 bytes/sec
194+ # WAV header is not finalized until rec stops, so we calculate
195+ # duration from file size and extract raw PCM bytes directly.
196+ local BYTES_PER_SEC=32000
197+ local WAV_HEADER_SIZE=44
195198 local MIN_CHUNK_SECS=5
199+ local last_byte_offset=$WAV_HEADER_SIZE
200+ local chunk_count=0
196201 local running_transcript=" /tmp/running_transcript_${TIMESTAMP} .txt"
197202 touch " $running_transcript "
198203
199204 while kill -0 $rec_pid 2> /dev/null; do
200- if [ -f " $recording_file " ] && [ -s " $recording_file " ]; then
201- # Get current recording duration in seconds
202- local current_duration
203- current_duration=$( soxi -D " $recording_file " 2> /dev/null || echo " 0" )
204- # Convert to integer for comparison
205- local current_int=${current_duration% .* }
206- local last_int=${last_transcribed% .* }
207- current_int=${current_int:- 0}
208- last_int=${last_int:- 0}
209- local new_audio=$(( current_int - last_int))
205+ if [ -f " $recording_file " ]; then
206+ local current_size
207+ current_size=$( stat -f%z " $recording_file " 2> /dev/null || echo " 0" )
208+ local new_bytes=$(( current_size - last_byte_offset))
209+ local new_secs=$(( new_bytes / BYTES_PER_SEC))
210210
211- if [ " $new_audio " -ge " $MIN_CHUNK_SECS " ]; then
211+ if [ " $new_secs " -ge " $MIN_CHUNK_SECS " ]; then
212212 chunk_count=$(( chunk_count + 1 ))
213213
214- # Extract only the new audio chunk
214+ # Align to full seconds of audio
215+ local chunk_bytes=$(( new_secs * BYTES_PER_SEC))
216+ local raw_file=" /tmp/chunk_raw_${TIMESTAMP} _${chunk_count} .pcm"
215217 local chunk_file=" /tmp/chunk_${TIMESTAMP} _${chunk_count} .wav"
216- sox " $recording_file " " $chunk_file " trim " $last_transcribed " 2> /dev/null
218+
219+ # Extract raw PCM bytes (bypass incomplete WAV header)
220+ dd if=" $recording_file " of=" $raw_file " bs=1 skip=" $last_byte_offset " count=" $chunk_bytes " 2> /dev/null
221+
222+ # Convert raw PCM to valid WAV for whisper-cli
223+ sox -t raw -r 16000 -c 1 -b 16 -e signed-integer -L " $raw_file " " $chunk_file " 2> /dev/null
224+ rm -f " $raw_file "
217225
218226 if [ -f " $chunk_file " ] && [ -s " $chunk_file " ]; then
219227 local chunk_transcript=" /tmp/chunk_transcript_${TIMESTAMP} _${chunk_count} "
@@ -225,18 +233,15 @@ original_live_transcription() {
225233 local new_text
226234 new_text=$( cat " ${chunk_transcript} .txt" | sed ' /^$/d' )
227235 if [ -n " $new_text " ] && [ " $new_text " != " " ]; then
228- # Append to running transcript and display
229236 echo " $new_text " >> " $running_transcript "
230237 print_color " $GREEN " " $new_text " >&2
231238 fi
232239 fi
233240
234- # Clean up chunk files
235241 rm -f " $chunk_file " " ${chunk_transcript} .txt" 2> /dev/null
236242 fi
237243
238- # Update position to current duration
239- last_transcribed=$current_duration
244+ last_byte_offset=$(( last_byte_offset + chunk_bytes))
240245 fi
241246 fi
242247 sleep 2
0 commit comments