Skip to content

Commit 1ad9c4e

Browse files
maxitoonclaude
andcommitted
Fix live transcription: use raw PCM extraction instead of soxi
soxi and sox trim rely on the WAV header, which rec does not finalize until recording stops. This caused the duration check to always return 0, so no chunks were ever extracted during live recording. Fix: calculate duration from file size (16kHz * 1ch * 16-bit = 32000 B/s), extract raw PCM bytes with dd, and convert to WAV with sox for whisper-cli. Also fix double .txt extension in saved transcript filename. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f44caad commit 1ad9c4e

1 file changed

Lines changed: 25 additions & 20 deletions

File tree

whisper-transcribe-with-download.sh

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ original_live_transcription() {
132132
local model_file=$1
133133
local language=$2
134134
local recording_file="$AUDIO_DOWNLOAD_DIR/${TIMESTAMP}_live_recording.wav"
135-
local transcript_file="$TRANSCRIPT_DIR/${TIMESTAMP}_live_transcript.txt"
135+
local transcript_file="$TRANSCRIPT_DIR/${TIMESTAMP}_live_transcript"
136136

137137
print_color "$CYAN" "🎙️ ORIGINAL Live Transcription Setup"
138138
echo "" >&2
@@ -190,30 +190,38 @@ original_live_transcription() {
190190
trap cleanup SIGINT SIGTERM
191191

192192
# Incremental chunk-based live transcription
193-
local last_transcribed=0
194-
local chunk_count=0
193+
# rec writes 16kHz mono 16-bit PCM = 32000 bytes/sec
194+
# WAV header is not finalized until rec stops, so we calculate
195+
# duration from file size and extract raw PCM bytes directly.
196+
local BYTES_PER_SEC=32000
197+
local WAV_HEADER_SIZE=44
195198
local MIN_CHUNK_SECS=5
199+
local last_byte_offset=$WAV_HEADER_SIZE
200+
local chunk_count=0
196201
local running_transcript="/tmp/running_transcript_${TIMESTAMP}.txt"
197202
touch "$running_transcript"
198203

199204
while kill -0 $rec_pid 2>/dev/null; do
200-
if [ -f "$recording_file" ] && [ -s "$recording_file" ]; then
201-
# Get current recording duration in seconds
202-
local current_duration
203-
current_duration=$(soxi -D "$recording_file" 2>/dev/null || echo "0")
204-
# Convert to integer for comparison
205-
local current_int=${current_duration%.*}
206-
local last_int=${last_transcribed%.*}
207-
current_int=${current_int:-0}
208-
last_int=${last_int:-0}
209-
local new_audio=$((current_int - last_int))
205+
if [ -f "$recording_file" ]; then
206+
local current_size
207+
current_size=$(stat -f%z "$recording_file" 2>/dev/null || echo "0")
208+
local new_bytes=$((current_size - last_byte_offset))
209+
local new_secs=$((new_bytes / BYTES_PER_SEC))
210210

211-
if [ "$new_audio" -ge "$MIN_CHUNK_SECS" ]; then
211+
if [ "$new_secs" -ge "$MIN_CHUNK_SECS" ]; then
212212
chunk_count=$((chunk_count + 1))
213213

214-
# Extract only the new audio chunk
214+
# Align to full seconds of audio
215+
local chunk_bytes=$((new_secs * BYTES_PER_SEC))
216+
local raw_file="/tmp/chunk_raw_${TIMESTAMP}_${chunk_count}.pcm"
215217
local chunk_file="/tmp/chunk_${TIMESTAMP}_${chunk_count}.wav"
216-
sox "$recording_file" "$chunk_file" trim "$last_transcribed" 2>/dev/null
218+
219+
# Extract raw PCM bytes (bypass incomplete WAV header)
220+
dd if="$recording_file" of="$raw_file" bs=1 skip="$last_byte_offset" count="$chunk_bytes" 2>/dev/null
221+
222+
# Convert raw PCM to valid WAV for whisper-cli
223+
sox -t raw -r 16000 -c 1 -b 16 -e signed-integer -L "$raw_file" "$chunk_file" 2>/dev/null
224+
rm -f "$raw_file"
217225

218226
if [ -f "$chunk_file" ] && [ -s "$chunk_file" ]; then
219227
local chunk_transcript="/tmp/chunk_transcript_${TIMESTAMP}_${chunk_count}"
@@ -225,18 +233,15 @@ original_live_transcription() {
225233
local new_text
226234
new_text=$(cat "${chunk_transcript}.txt" | sed '/^$/d')
227235
if [ -n "$new_text" ] && [ "$new_text" != " " ]; then
228-
# Append to running transcript and display
229236
echo "$new_text" >> "$running_transcript"
230237
print_color "$GREEN" "$new_text" >&2
231238
fi
232239
fi
233240

234-
# Clean up chunk files
235241
rm -f "$chunk_file" "${chunk_transcript}.txt" 2>/dev/null
236242
fi
237243

238-
# Update position to current duration
239-
last_transcribed=$current_duration
244+
last_byte_offset=$((last_byte_offset + chunk_bytes))
240245
fi
241246
fi
242247
sleep 2

0 commit comments

Comments
 (0)