tetherto
diff --git a/‎tts-cpp/CMakeLists.txt‎
Lines changed: 25 additions & 0 deletions b/‎tts-cpp/CMakeLists.txt‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎tts-cpp/README.md‎
Lines changed: 13 additions & 1 deletion b/‎tts-cpp/README.md‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎tts-cpp/include/tts-cpp/chatterbox/engine.h‎
Lines changed: 14 additions & 0 deletions b/‎tts-cpp/include/tts-cpp/chatterbox/engine.h‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎tts-cpp/include/tts-cpp/supertonic/engine.h‎
Lines changed: 9 additions & 0 deletions b/‎tts-cpp/include/tts-cpp/supertonic/engine.h‎
Lines changed: 9 additions & 0 deletions
@@ -981,6 +981,31 @@ if (TTS_CPP_BUILD_TESTS)
         ARGS     "${_tcb_t3_turbo_gguf}" "${_tcb_s3gen_gguf}"
         REQUIRES "${_tcb_t3_turbo_gguf}" "${_tcb_s3gen_gguf}")
 
+    # QVAC-21483 — output-frequency selection on the chatterbox::Engine API
+    # (batch + streaming + validation).  Uses the multilingual fixtures so it
+    # runs anywhere the mtl-synth tests do; auto-disabled when they're absent.
+    add_executable(test-output-sample-rate test/test_output_sample_rate.cpp)
+    target_link_libraries(test-output-sample-rate PRIVATE tts-cpp ggml tts-cpp-backend-defs)
+    target_include_directories(test-output-sample-rate PRIVATE ggml/include src include)
+    tts_cpp_apply_ccache(test-output-sample-rate)
+    tts_cpp_register_test(test-output-sample-rate
+        LABEL    "fixture"
+        ARGS     "${_tcb_t3_mtl_gguf}" "${_tcb_s3gen_mtl_gguf}"
+        REQUIRES "${_tcb_t3_mtl_gguf}" "${_tcb_s3gen_mtl_gguf}")
+
+    # QVAC-21483 — Supertonic sibling of the above: native rate, 16 kHz batch
+    # ratio, construction rejection, streaming result.pcm == concat, and the
+    # streaming-equals-whole-buffer-resample batch-exact property.  Gated on the
+    # Supertonic GGUF fixture; auto-disabled when it's absent.
+    add_executable(test-output-sample-rate-supertonic test/test_output_sample_rate_supertonic.cpp)
+    target_link_libraries(test-output-sample-rate-supertonic PRIVATE tts-cpp ggml tts-cpp-backend-defs)
+    target_include_directories(test-output-sample-rate-supertonic PRIVATE ggml/include src include)
+    tts_cpp_apply_ccache(test-output-sample-rate-supertonic)
+    tts_cpp_register_test(test-output-sample-rate-supertonic
+        LABEL    "fixture"
+        ARGS     "${_tcb_super_gguf}"
+        REQUIRES "${_tcb_super_gguf}")
+
     # CPU-side persistent-cache validation.
     # Exercises the time_mlp / time_emb / cfm_estimator / weight_mirror
     # caches that amortise per-synth overhead on the multilingual CPU
 
@@ -403,7 +403,8 @@ harnesses:
 | `build/supertonic-cli`        | Supertonic-only end-to-end CLI (text → wav) — the same engine `tts-cli` invokes when it sees a Supertonic GGUF, exposed standalone for scripting and parity work |
 | `build/supertonic-bench`      | Per-stage Supertonic benchmark harness (`--text` / `--out` / `--runs`); machine-readable RTF + per-stage timings |
 | `build/test-s3gen`            | Staged numerical validation of S3Gen encoder + CFM vs Python dumps |
-| `build/test-resample`         | Round-trip SNR of the C++ Kaiser-windowed sinc resampler |
+| `build/test-resample`         | Round-trip SNR of the C++ Kaiser-windowed sinc resampler + output-frequency helpers (validate / passthrough / ratio) |
+| `build/test-output-sample-rate` | `--output-sample-rate` on `chatterbox::Engine`: native/16 kHz batch, out-of-range rejection, streaming `pcm == concat(chunks)` invariant (needs the MTL GGUFs) |
 | `build/test-voice-features`   | 24 kHz 80-ch mel parity (prompt_feat) |
 | `build/test-fbank`            | 16 kHz 80-ch Kaldi fbank parity |
 | `build/test-voice-encoder`    | VoiceEncoder 256-d speaker embedding parity |
@@ -634,6 +635,17 @@ N=6 is too aggressive (cosine 0.990 right at the threshold, PCM cosine
 drops to 0.88).  Streaming chunks ignore this flag and use
 `--stream-cfm-steps` instead.
 
+`--output-sample-rate HZ` (QVAC-21483) selects the output frequency.  The
+pipeline natively emits 24 kHz (Chatterbox) / the model's metadata rate
+(Supertonic); pass a positive rate in `8000..192000` to resample the final
+PCM with the in-tree Kaiser-windowed sinc resampler before it's written or
+streamed (e.g. `--output-sample-rate 16000` for a 16 kHz wav).  `0` (the
+default) keeps the native rate — zero behaviour change.  Works on the
+single-shot, auto-split, and streaming paths and on both engines (the
+`tts_cpp::*::EngineOptions::output_sample_rate` field exposes the same knob
+to library callers; `SynthesisResult::sample_rate` always reports the actual
+rate).
+
 Everything is self-contained in the two `.gguf` files:
 
 - `chatterbox-t3-turbo.gguf` embeds the BPE tokenizer (vocab + merges +
 
@@ -193,6 +193,20 @@ struct EngineOptions {
     // S3Gen side.  0 = library default (2-step meanflow).
     int cfm_steps = 0;
 
+    // QVAC-21483 — desired output sample rate in Hz.  The Chatterbox pipeline
+    // natively emits 24 kHz mono float32; when this is a positive rate other
+    // than 24000 the engine resamples the final PCM (Kaiser-windowed sinc, the
+    // same primitive used for reference-audio preprocessing) to the requested
+    // rate and reports it on SynthesisResult::sample_rate.  In streaming mode
+    // every chunk is fed through one utterance-spanning resampler that emits an
+    // output sample only once its sinc window is fully covered by the audio seen
+    // so far, so the delivered chunks concatenate to exactly the same PCM as
+    // resampling the whole utterance once — no per-chunk seam artifacts — and
+    // the documented `result.pcm == concat(chunks)` invariant still holds.
+    // 0 keeps the native 24 kHz (default; zero behaviour change).  Validated at
+    // construction to 0 or [8000, 192000] Hz.
+    int output_sample_rate = 0;
+
     // ---------------- Streaming synthesis ----------------------------
     //
     // When `stream_chunk_tokens > 0` AND the caller passes a non-empty
 
@@ -116,6 +116,15 @@ struct EngineOptions {
     int   n_threads     = 0;
     int   n_gpu_layers  = 0;
 
+    // QVAC-21483 — desired output sample rate in Hz.  Supertonic natively
+    // emits at the model's metadata rate (typically 44.1 kHz); when this is a
+    // positive rate other than the native one the engine resamples the final
+    // PCM (Kaiser-windowed sinc) and reports it on
+    // SynthesisResult::sample_rate.  Honoured on both the batch and streaming
+    // paths.  0 keeps the native rate (default; zero behaviour change).
+    // Validated at construction to 0 or [8000, 192000] Hz.
+    int   output_sample_rate = 0;
+
     // Compute precision for matmul weights — see Precision enum above.
     // Default F32 is the current behaviour (load q8_0 GGUF, expand to f32).
     // F16 / Q8_0 are non-default GPU paths (Metal-validated).