ggml-org · rmorse · Feb 5, 2026 · Feb 5, 2026 · Feb 6, 2026 · Feb 6, 2026
diff --git a/README.md b/README.md
@@ -509,6 +509,14 @@ cmake --build build -j --config Release
 
 https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
 
+If you want to stream audio from another app without SDL or a microphone device, use
+[stream-pcm](examples/stream-pcm). It reads raw PCM from stdin or a pipe:
+
+```bash
+ffmpeg -i samples/jfk.wav -f s16le -ac 1 -ar 16000 - | \
+  ./build/bin/whisper-stream-pcm -m ./models/ggml-base.en.bin --format s16 --sample-rate 16000 --step 500 --length 5000
+```
+
 ## Confidence color-coding
 
 Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
@@ -842,6 +850,7 @@ Some of the examples are even ported to run in the browser using WebAssembly. Ch
 | [whisper-cli](examples/cli)                         | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper                                                                       |
 | [whisper-bench](examples/bench)                     | [bench.wasm](examples/bench.wasm)     | Benchmark the performance of Whisper on your machine                                                                            |
 | [whisper-stream](examples/stream)                   | [stream.wasm](examples/stream.wasm)   | Real-time transcription of raw microphone capture                                                                               |
+| [whisper-stream-pcm](examples/stream-pcm)           |                                       | Real-time transcription of raw PCM via stdin/pipe (no SDL dependency)                                                           |
 | [whisper-command](examples/command)                 | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic                                                         |
 | [whisper-server](examples/server)                   |                                       | HTTP transcription server with OAI-like API                                                                                     |
 | [whisper-talk-llama](examples/talk-llama)           |                                       | Talk with a LLaMA bot                                                                                                           |

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -107,6 +107,7 @@ else()
     add_subdirectory(server)
     add_subdirectory(quantize)
     add_subdirectory(vad-speech-segments)
+    add_subdirectory(stream-pcm)
     if (WHISPER_SDL2)
         add_subdirectory(stream)
         add_subdirectory(command)

diff --git a/examples/stream-pcm/CMakeLists.txt b/examples/stream-pcm/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(TARGET whisper-stream-pcm)
+add_executable(${TARGET} stream-pcm.cpp)
+
+include(DefaultTargetOptions)
+
+target_link_libraries(${TARGET} PRIVATE common whisper ${CMAKE_THREAD_LIBS_INIT})
+
+install(TARGETS ${TARGET} RUNTIME)
diff --git a/examples/stream-pcm/README.md b/examples/stream-pcm/README.md
@@ -0,0 +1,55 @@
+# whisper.cpp/examples/stream-pcm
+
+This example performs real-time inference on raw PCM audio streamed via stdin, a pipe, or a file.
+It is PCM-first (input is consumed once) and does not require SDL or a microphone device.
+
+## Usage
+
+Stream raw PCM (16 kHz, mono) into the tool (non-VAD):
+
+```bash
+./build/bin/whisper-stream-pcm -m ./models/ggml-base.en.bin --format s16 --sample-rate 16000 --step 1000 --length 10000 --keep 500
+```
+
+Enable VAD-based segmentation (optional, recommended for speech bursts):
+
+```bash
+./build/bin/whisper-stream-pcm -m ./models/ggml-base.en.bin --format s16 --sample-rate 16000 --vad --vad-probe-ms 200 --vad-silence-ms 800 --vad-pre-roll-ms 300 --length 8000
+```
+
+You can also read from a named pipe (FIFO):
+
+```bash
+mkfifo /tmp/whisper.pcm
+./build/bin/whisper-stream-pcm -m ./models/ggml-base.en.bin --input /tmp/whisper.pcm --format s16 --sample-rate 16000 --step 1000 --length 10000 --keep 500
+```
+
+Example of piping a WAV file using ffmpeg (optional, `-re` for realtime pacing):
+
+```bash
+ffmpeg -re -i samples/jfk.wav -f s16le -ac 1 -ar 16000 - | \
+  ./build/bin/whisper-stream-pcm -m ./models/ggml-base.en.bin --format s16 --sample-rate 16000 --step 1000 --length 10000 --keep 500
+```
+
+Windows (PowerShell + `cmd /c`) pipe example:
+
+```powershell
+cmd /c "ffmpeg -re -hide_banner -loglevel error -i samples\jfk.wav -f s16le -ac 1 -ar 16000 - | build-cpu\bin\Release\whisper-stream-pcm.exe -m models\ggml-base.en.bin --format s16 --sample-rate 16000 --step 1000 --length 10000 --keep 500"
+```
+
+## Notes
+
+- Input must be raw PCM, mono, 16 kHz. The tool does not resample.
+- Supported formats: `f32` or `s16` (little-endian).
+- Use `--input -` (default) for stdin.
+- `--step` must be > 0 unless `--vad` is enabled.
+- For VAD, `--vad-probe-ms` should be at least 200 ms; very small probes can fail to trigger.
+
+## Building
+
+`whisper-stream-pcm` does not depend on SDL and builds with the default examples:
+
+```bash
+cmake -B build
+cmake --build build --config Release
+```