forked from ggml-org/whisper.cpp
-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
1646 lines (1535 loc) · 86.2 KB
/
Copy pathCMakeLists.txt
File metadata and controls
1646 lines (1535 loc) · 86.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
cmake_minimum_required(VERSION 3.20)
project(tts-cpp LANGUAGES C CXX VERSION 0.1.0)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include(GNUInstallDirs)
include(CheckIncludeFileCXX)
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(TTS_CPP_STANDALONE_DEFAULT ON)
else()
set(TTS_CPP_STANDALONE_DEFAULT OFF)
endif()
option(TTS_CPP_BUILD_LIBRARY "tts-cpp: build the tts-cpp library" ON)
option(TTS_CPP_BUILD_SHARED "tts-cpp: build tts-cpp as a SHARED library (otherwise STATIC). Defaults to OFF; the test harnesses (test-mtl-tokenizer, test-supertonic-*) link against tts-cpp directly and use detail-namespaced symbols outside the TTS_CPP_API public surface, so SHARED hides them and disables those targets. Decoupled from BUILD_SHARED_LIBS because ggml's own CMake declares its own option(BUILD_SHARED_LIBS) which pollutes the cache with a platform-dependent default once any configure has run; using a project-namespaced option keeps the two independent and prevents a second configure from silently flipping linkage." OFF)
option(TTS_CPP_BUILD_EXECUTABLES "tts-cpp: build the tts-cli executable + mel2wav demo" ${TTS_CPP_STANDALONE_DEFAULT})
option(TTS_CPP_BUILD_TESTS "tts-cpp: build per-stage numerical validation harnesses" ${TTS_CPP_STANDALONE_DEFAULT})
option(TTS_CPP_INSTALL "tts-cpp: generate install rules" ON)
option(TTS_CPP_USE_SYSTEM_GGML "tts-cpp: use system-installed GGML library (find_package(ggml) instead of add_subdirectory(ggml)). Defaults to ON in this in-tree subtree of qvac-ext-lib-whisper.cpp because the standalone setup-ggml.sh + patches/ tooling has been dropped here; the QVAC speech-stack vcpkg port `ggml-speech` ships a pre-patched ggml from the qvac-ext-ggml/speech branch and is the canonical source-of-truth for builds out of this tree. Flipping OFF in this tree is rejected at configure time (no patches/ to apply); use the standalone github.com/gianni-cor/chatterbox.cpp repo if you need a bundled-ggml dev build." ON)
# Note: the standalone chatterbox.cpp repo carries a TTS_CPP_GGML_LIB_PREFIX
# option here that controls renaming of bundled libggml-*.so to
# libspeech-ggml-*.so when add_subdirectory(ggml) builds it locally. In
# this in-tree subtree that path is rejected at configure time (the
# patches/ folder is intentionally absent; see TTS_CPP_USE_SYSTEM_GGML
# below), so the option, the helper function, and the foreach prefix
# loop that used to live here would all be unreachable code. Dropped
# entirely; the `ggml-speech` vcpkg port emits the renamed filenames
# itself, so consumers picking up tts-cpp through find_package never
# see the standalone bundled-rename path.
option(TTS_CPP_CCACHE "tts-cpp: use ccache as compiler launcher if available" ON)
# ccache shaves ~30 % off iterative rebuild time on developer machines.
# Opt-out via -DTTS_CPP_CCACHE=OFF for clean reproducibility runs.
# Scoped to tts-cpp's own targets via the helper below; ggml has its own
# GGML_CCACHE option which we leave alone so the two launchers don't
# double-up on the ggml subdirectory.
if (TTS_CPP_CCACHE)
find_program(TTS_CPP_CCACHE_PROGRAM ccache)
if (TTS_CPP_CCACHE_PROGRAM)
message(STATUS "tts-cpp: using ccache at ${TTS_CPP_CCACHE_PROGRAM} (tts-cpp targets only; ggml uses GGML_CCACHE)")
endif()
endif()
function(tts_cpp_apply_ccache target)
if (TTS_CPP_CCACHE_PROGRAM)
set_target_properties(${target} PROPERTIES
C_COMPILER_LAUNCHER "${TTS_CPP_CCACHE_PROGRAM}"
CXX_COMPILER_LAUNCHER "${TTS_CPP_CCACHE_PROGRAM}"
)
endif()
endfunction()
if (CMAKE_CROSSCOMPILING)
if (NOT DEFINED GGML_NATIVE OR GGML_NATIVE)
message(STATUS "tts-cpp: cross-compiling to ${CMAKE_SYSTEM_NAME} -- "
"forcing GGML_NATIVE=OFF (ggml's ARM feature probes use try_run() "
"which CMake refuses in cross-compile mode).")
set(GGML_NATIVE OFF CACHE BOOL "" FORCE)
endif()
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "iOS" OR CMAKE_SYSTEM_NAME STREQUAL "Android")
if (NOT DEFINED GGML_BLAS OR GGML_BLAS)
set(GGML_BLAS OFF CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED GGML_ACCELERATE OR GGML_ACCELERATE)
set(GGML_ACCELERATE OFF CACHE BOOL "" FORCE)
endif()
endif()
# Android default backend stack: dynamic loading of Vulkan + OpenCL +
# per-arch CPU variants. Mirrors parakeet-cpp's same-repo sibling and
# the qvac llm-llamacpp Android config (see
# qvac-registry-vcpkg/ports/llama-cpp/portfile.cmake) so the tts-cpp
# Android prebuilds drop into the same `qvac__tts-ggml/` folder shape
# as the parakeet / llamacpp ones: a `.bare` module + sibling
# `lib<prefix>ggml-{vulkan,opencl,cpu-android_armv*_*}.so` files that
# `ggml_backend_load_all_from_path()` discovers at runtime.
#
# Selection at runtime is centralised in
# `tts_cpp::detail::init_gpu_backend()` (src/backend_selection.cpp):
# OpenCL when an Adreno 700+ device is present, Vulkan for every
# other GPU (non-Adreno, Adreno < 700, Mali, Xclipse, ...). No
# static GPU backend entry points are linked anywhere in libtts-cpp;
# the registry walk reaches the right backend in both
# GGML_BACKEND_DL=ON (Android prebuild) and GGML_BACKEND_DL=OFF
# (desktop dev) modes.
#
# Callers that have specific reasons to deviate (e.g. a desktop
# bring-up build that wants Vulkan only) can still override any of
# these at the cmake command line; we only set defaults that haven't
# already been provided.
if (CMAKE_SYSTEM_NAME STREQUAL "Android")
if (NOT DEFINED CACHE{GGML_BACKEND_DL})
set(GGML_BACKEND_DL ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_CPU_ALL_VARIANTS})
set(GGML_CPU_ALL_VARIANTS ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_CPU_REPACK})
set(GGML_CPU_REPACK ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_VULKAN})
set(GGML_VULKAN ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_OPENCL})
set(GGML_OPENCL ON CACHE BOOL "" FORCE)
endif()
# ggml-vulkan's coopmat / coopmat2 shader compile pulls in
# extensions that most Android Vulkan drivers don't expose; the
# upstream llama Android build disables both for the same reason.
if (NOT DEFINED CACHE{GGML_VULKAN_DISABLE_COOPMAT})
set(GGML_VULKAN_DISABLE_COOPMAT ON CACHE BOOL "" FORCE)
endif()
if (NOT DEFINED CACHE{GGML_VULKAN_DISABLE_COOPMAT2})
set(GGML_VULKAN_DISABLE_COOPMAT2 ON CACHE BOOL "" FORCE)
endif()
endif()
# Two related workarounds for clang-cl / MSVC builds on Windows. Both
# come from msys2 sneaking GCC-flavoured libraries onto CMake's search
# paths and being mismatched against MSVC-compiled translation units.
# Both are no-ops on MinGW (excluded) and on non-Windows.
#
# (1) libm. ggml/src/CMakeLists.txt unconditionally links libm on Windows
# unless ONEAPI_ROOT is set, which fails when find_library picks up a
# stray libm.dll.a from msys2 (lld-link errors with `could not open
# 'm.lib'`). Set ONEAPI_ROOT in the configure-time environment so
# ggml's existing escape hatch fires. set(ENV{...}) only affects
# the running CMake configure process — the same scope ggml's check
# runs in.
#
# (2) OpenMP. CMake's FindOpenMP picks `-fopenmp=libomp` (LLVM-style) for
# MSVC, but resolves OpenMP_*_LIBRARIES to msys2's libgomp.dll.a if
# msys2 is on PATH. The resulting object files reference LLVM
# `__kmpc_*` symbols that GOMP doesn't provide and lld-link errors
# out with `undefined symbol: __kmpc_global_thread_num`. Disable
# OpenMP for this toolchain; the perf hit on the CPU encoder is
# bounded and only affects this specific build environment.
if (WIN32 AND NOT MINGW)
if (NOT DEFINED ENV{ONEAPI_ROOT})
set(ENV{ONEAPI_ROOT} "1")
endif()
if (NOT DEFINED GGML_OPENMP AND NOT DEFINED CACHE{GGML_OPENMP})
# Normal (non-cache) variable on purpose: scoped to this
# listfile + its add_subdirectory(ggml) below, doesn't poison
# parent-project CMakeCache when tts-cpp is consumed via
# add_subdirectory. ggml's `option(GGML_OPENMP ... ON)` respects
# an existing normal variable under CMP0077 (CMake 3.13+).
set(GGML_OPENMP OFF)
endif()
endif()
if (NOT TARGET ggml)
if (TTS_CPP_USE_SYSTEM_GGML)
find_package(ggml CONFIG REQUIRED)
if (NOT ggml_FOUND)
message(FATAL_ERROR
"tts-cpp: system-installed GGML library not found. Install the "
"qvac speech-stack `ggml-speech` vcpkg port (qvac-ext-ggml/speech "
"branch) and re-run cmake.")
endif()
add_library(ggml ALIAS ggml::ggml)
else()
# Bundled-ggml dev build path (TTS_CPP_USE_SYSTEM_GGML=OFF).
# Expects `tts-cpp/ggml/` to be a checkout of the
# tetherto/qvac-ext-ggml repo on the `speech` branch — the QVAC
# fork carrying every infrastructure patch + the Supertonic 2
# fused custom op family as commits (not as a patches/ overlay).
#
# Run `bash tts-cpp/scripts/setup-ggml.sh` first to clone +
# check out the pinned commit. No patches/ directory is
# consulted: the speech branch is already pre-patched at the
# commit level.
if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ggml/CMakeLists.txt")
message(FATAL_ERROR
"tts-cpp: bundled-ggml build requires tts-cpp/ggml/ to be "
"a checkout of tetherto/qvac-ext-ggml@speech. Run "
"`bash tts-cpp/scripts/setup-ggml.sh` first, or pass "
"-DTTS_CPP_USE_SYSTEM_GGML=ON to consume the QVAC "
"speech-stack `ggml-speech` vcpkg port.")
endif()
add_subdirectory(ggml)
endif()
endif()
# The bundled-ggml filename-prefix block (helper function +
# tts_cpp_apply_ggml_prefix loop + GGML_BACKEND_DL_PROJECT_PREFIX
# define) used to live here in the standalone chatterbox.cpp source.
# Removed in this in-tree subtree: the bundled add_subdirectory(ggml)
# path is rejected above when patches/ is absent, and find_package
# (TTS_CPP_USE_SYSTEM_GGML=ON, the default here) consumes a ggml
# whose filenames + macro are already set by the ggml-speech vcpkg
# port at its build time. See the standalone chatterbox.cpp repo if
# you need the locally-rename flow.
option(TTS_CPP_OPENMP "tts-cpp: enable OpenMP for the tts-cpp target" ON)
if (WIN32 AND NOT MINGW AND TTS_CPP_OPENMP AND NOT DEFINED CACHE{TTS_CPP_OPENMP_USER_OVERRIDE})
set(TTS_CPP_OPENMP OFF CACHE BOOL "" FORCE)
message(STATUS "tts-cpp: OpenMP disabled on Windows non-MinGW (clang-cl / MSVC + msys2 libgomp mismatch, plus consumer-toolchain probe leaks via static-archive transitive link interface). Set -DTTS_CPP_OPENMP_USER_OVERRIDE=ON and -DTTS_CPP_OPENMP=ON to force-enable.")
endif()
if (TTS_CPP_OPENMP)
find_package(OpenMP)
endif()
if (MSVC)
add_compile_definitions(_USE_MATH_DEFINES _CRT_SECURE_NO_WARNINGS NOMINMAX)
endif()
# Legacy interface library kept for export-set compatibility (it is
# still part of `install(EXPORT tts-cppTargets)` below and downstream
# `find_package(tts-cpp)` consumers list it as a link dep). Body is
# intentionally empty: tts-cpp routes every backend SELECTION and
# capability query through the ggml-backend registry
# (`init_gpu_backend()` / `init_cpu_backend()` / `init_blas_backend()`
# in src/backend_selection.cpp) and does NOT call any
# `ggml_backend_<backend>_init` / `ggml_backend_is_<backend>` /
# `ggml_backend_vk_*` entry point directly — the registry walk +
# `ggml_backend_get_device` / `ggml_backend_dev_*` calls reach the
# right backend in both `GGML_BACKEND_DL=ON` (Android / Linux .so
# prebuild) and `GGML_BACKEND_DL=OFF` (static-link desktop) modes.
# Mirrors parakeet-cpp's `parakeet-backend-defs`.
add_library(tts-cpp-backend-defs INTERFACE)
set(TTS_CPP_LIB_SOURCES
src/backend_selection.cpp
src/main.cpp
src/chatterbox_cli.cpp
src/gpt2_bpe.cpp
src/chatterbox_tts.cpp
src/chatterbox_engine.cpp
src/log.cpp
src/voice_features.cpp
src/mel_extract_stft.cpp
src/voice_encoder.cpp
src/campplus.cpp
src/s3tokenizer.cpp
src/t3_mtl.cpp
src/t3_stop_controller.cpp
src/t3_alignment_analyzer.cpp
src/supertonic_gguf.cpp
src/supertonic_preprocess.cpp
src/supertonic_vocoder.cpp
src/supertonic_duration.cpp
src/supertonic_text_encoder.cpp
src/supertonic_vector_estimator.cpp
src/supertonic_engine.cpp
src/supertonic_voice_json.cpp
src/supertonic_chunker.cpp
src/mtl_tokenizer.cpp
src/text_preprocess.cpp
# LavaSR neural speech enhancement (QVAC-16579) — CPU/GGML post-process.
src/lavasr/dsp/resampler.cpp
src/lavasr/dsp/stft_processor.cpp
src/lavasr/dsp/mel_filterbank.cpp
src/lavasr/dsp/fastlr_merge.cpp
src/lavasr/enhancer_core.cpp
src/lavasr/enhancer.cpp
src/lavasr/enhancer_gguf.cpp
src/lavasr/enhancer_api.cpp
)
if (TTS_CPP_BUILD_LIBRARY)
# Decoupled from BUILD_SHARED_LIBS by design. parakeet.cpp's same-
# repo sibling re-couples (`if (DEFINED BUILD_SHARED_LIBS)`), but
# that pattern silently breaks on the second configure: ggml's own
# add_subdirectory(ggml) declares option(BUILD_SHARED_LIBS ...) which
# populates the cache with a platform-dependent default (Windows
# non-MinGW: ON), and any subsequent DEFINED check returns TRUE
# regardless of whether the user explicitly asked for shared linkage.
# Empirically: cmake -B build (1st run, no -D) -> STATIC; cmake -B
# build (2nd run, no -D) -> SHARED. Project-namespaced option keeps
# the two strictly independent. See PR review item #15.
#
# SHARED hides the detail-namespaced symbols that the test harnesses
# link against directly (test-mtl-tokenizer, test-supertonic-*); the
# CXX_VISIBILITY_PRESET=hidden setting added below makes those
# symbols invisible at SHARED-link time. Disable the tests
# (TTS_CPP_BUILD_TESTS=OFF) when building shared.
if (TTS_CPP_BUILD_SHARED)
add_library(tts-cpp SHARED ${TTS_CPP_LIB_SOURCES})
else()
add_library(tts-cpp STATIC ${TTS_CPP_LIB_SOURCES})
endif()
add_library(tts-cpp::tts-cpp ALIAS tts-cpp)
tts_cpp_apply_ccache(tts-cpp)
# Hide every symbol in the .so/.dll by default; only those tagged with
# TTS_CPP_API (see include/tts-cpp/export.h) end up exported. Mirrors
# parakeet.cpp's pattern and shrinks the runtime dynamic-symbol table to
# just the public API surface, which is also what lets a host that
# vendors libtts-cpp.a inside its own .so see only one tts_cpp::* surface
# instead of every detail-namespaced helper.
set_target_properties(tts-cpp PROPERTIES
CXX_VISIBILITY_PRESET hidden
VISIBILITY_INLINES_HIDDEN ON
)
target_include_directories(tts-cpp
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
${CMAKE_CURRENT_SOURCE_DIR}/ggml/src
)
target_compile_features(tts-cpp PUBLIC cxx_std_17)
target_link_libraries(tts-cpp PUBLIC ggml)
# ------------------------------------------------------------------
# Optional native preprocessors for the MTL non-Latin tier:
# * `ja` -- libmecab (dictionary path passed at runtime)
# * `zh` -- Cangjie5_TC mapping (TSV path passed at runtime)
#
# libmecab is autodetected via find_package / find_library / find_path.
# The dictionary path is no longer baked in at compile time; callers
# pass it at runtime via --mecab-dict (CLI) or EngineOptions::mecab_dict_path
# (API). Use scripts/build_mecab_dict.py to materialize the IPAdic dictionary
# from the `ipadic` PyPI package.
#
# Cangjie no longer requires a compile-time TSV path; callers
# pass it at runtime via --cangjie-tsv (CLI) or
# EngineOptions::cangjie_tsv_path (API).
# ------------------------------------------------------------------
find_package(mecab CONFIG QUIET)
find_library(MECAB_LIBRARY NAMES mecab)
find_path (MECAB_INCLUDE_DIR NAMES mecab.h PATH_SUFFIXES mecab)
if (MECAB_LIBRARY AND MECAB_INCLUDE_DIR)
message(STATUS "MeCab (Japanese preprocess): ENABLED")
message(STATUS " library : ${MECAB_LIBRARY}")
message(STATUS " include : ${MECAB_INCLUDE_DIR}")
# MeCab is an implementation detail of src/text_preprocess.cpp; no
# public header under include/ exposes MeCab types or symbols. Keep
# the define, include dir and link all PRIVATE so consumers doing
# find_package(tts-cpp) don't inherit a libmecab dependency (and so
# the MinGW mecab.h headers never leak into a consumer's TU).
target_compile_definitions(tts-cpp PRIVATE TTS_CPP_HAS_MECAB)
target_include_directories(tts-cpp PRIVATE ${MECAB_INCLUDE_DIR})
if (TARGET mecab::mecab)
# The vcpkg mecab target carries the Windows static-library
# compile definitions needed to avoid dllimport references.
target_link_libraries(tts-cpp PRIVATE mecab::mecab)
else()
target_link_libraries(tts-cpp PRIVATE ${MECAB_LIBRARY})
endif()
else()
message(STATUS "MeCab (Japanese preprocess): DISABLED (libmecab / mecab.h not found)")
endif()
# Symbol-export contract for shared builds. Standard llama-style
# PAIR: PUBLIC define so consumers see the import side of TTS_CPP_API
# (declspec(dllimport) on Windows / visibility("default") elsewhere),
# PRIVATE define inside the library's TUs to flip Windows from
# dllimport to dllexport. Both no-ops in static builds (the
# default), so static consumers see exactly the same surface.
get_target_property(_tts_cpp_type tts-cpp TYPE)
if (_tts_cpp_type STREQUAL "SHARED_LIBRARY")
target_compile_definitions(tts-cpp PUBLIC TTS_CPP_SHARED)
target_compile_definitions(tts-cpp PRIVATE TTS_CPP_BUILD)
endif()
# Versioning: VERSION drives the SONAME suffix on Linux/macOS shared
# builds; SOVERSION 0 keeps the load-time name (libtts-cpp.so.0)
# stable across patch releases so consumers don't relink on every
# bump. No effect on static / Windows builds. Mirrors parakeet.cpp.
set_target_properties(tts-cpp PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION 0
)
# Release-mode optimisation flags. GCC/clang use -O3 in CMake's
# default Release config already, but ffast-math + funroll-loops
# buy ~5-15% on the T3-dominated CPU path (~80% of wall time on
# M3 CPU per README's benchmarks). Conditioned on non-MSVC; the
# MSVC default /O2 is good enough and these are GCC-syntax flags.
if (NOT MSVC AND (CMAKE_BUILD_TYPE STREQUAL "Release" OR NOT CMAKE_BUILD_TYPE))
target_compile_options(tts-cpp PRIVATE
$<$<COMPILE_LANGUAGE:CXX>:-O3>
$<$<COMPILE_LANGUAGE:C>:-O3>
$<$<COMPILE_LANGUAGE:CXX>:-ffast-math>
$<$<COMPILE_LANGUAGE:C>:-ffast-math>
$<$<COMPILE_LANGUAGE:CXX>:-funroll-loops>
$<$<COMPILE_LANGUAGE:C>:-funroll-loops>
)
endif()
if (OpenMP_CXX_FOUND)
target_link_libraries(tts-cpp PRIVATE OpenMP::OpenMP_CXX)
endif()
target_link_libraries(tts-cpp PRIVATE tts-cpp-backend-defs)
set_target_properties(tts-cpp PROPERTIES
POSITION_INDEPENDENT_CODE ON
EXPORT_NAME tts-cpp
)
endif()
if (TTS_CPP_BUILD_EXECUTABLES)
if (NOT TTS_CPP_BUILD_LIBRARY)
message(FATAL_ERROR "TTS_CPP_BUILD_EXECUTABLES requires TTS_CPP_BUILD_LIBRARY=ON")
endif()
add_executable(tts-cli src/cli_main.cpp)
target_link_libraries(tts-cli PRIVATE tts-cpp)
tts_cpp_apply_ccache(tts-cli)
add_executable(mel2wav src/mel2wav.cpp)
target_link_libraries(mel2wav PRIVATE ggml)
target_include_directories(mel2wav PRIVATE ggml/include src)
tts_cpp_apply_ccache(mel2wav)
add_executable(supertonic-cli src/supertonic_cli.cpp)
target_link_libraries(supertonic-cli PRIVATE tts-cpp)
tts_cpp_apply_ccache(supertonic-cli)
endif()
if (TTS_CPP_BUILD_TESTS)
enable_testing()
include(CTest)
# Roots that the test registrations below resolve fixture paths against.
# All three are CACHE PATHs so CI / package builds can point them at
# pre-staged mirrors (e.g. a tarball of `.gguf`s + reference `.npy`s)
# without editing this listfile.
#
# MODEL_DIR - GGUF checkpoints (produced by scripts/convert-*-to-gguf.py
# or `bash scripts/setup-supertonic2.sh`)
# AUDIO_DIR - reference WAV fixtures (test/reference-audio/jfk.wav, ...)
# REF_DIR - PyTorch / ONNX reference dumps (`.npy`) produced by
# scripts/dump-{s3gen,supertonic,t3-mtl}-reference.py;
# expected layout is REF_DIR/{s3gen-ref,supertonic-ref-quick,t3-mtl-ref}/.
# Tests that need a ref dump auto-disable when the dump
# isn't present, so a fresh checkout still gives a green
# ctest run on the harnesses whose fixtures it does have.
set(TTS_CPP_TEST_MODEL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/models" CACHE PATH "tts-cpp: directory containing .gguf fixtures used by the test suite")
set(TTS_CPP_TEST_AUDIO_DIR "${CMAKE_CURRENT_SOURCE_DIR}/test/reference-audio" CACHE PATH "tts-cpp: directory containing .wav fixtures used by the test suite")
set(TTS_CPP_TEST_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/artifacts" CACHE PATH "tts-cpp: directory containing PyTorch / ONNX .npy reference dumps")
# Helper: register a built test target with CTest. Mirrors the
# standard llama-style `<project>_test()` macro: every harness ends
# up surfaced via `ctest` against the build dir, with labels that
# allow filtering (e.g. `ctest -L cpu` to skip GPU-only tests on CI
# fleets without the matching backend). The optional REQUIRES list
# names paths that must exist at configure time; if any is missing
# the test is still registered (so it shows up in `ctest -N`) but
# marked DISABLED so a missing reference dump shows as "Not Run"
# rather than a hard failure.
function(tts_cpp_register_test target)
cmake_parse_arguments(TCRT "" "LABEL;EXE" "ARGS;REQUIRES" ${ARGN})
if (NOT DEFINED TCRT_LABEL)
set(TCRT_LABEL "cpu")
endif()
# EXE defaults to the test name when the test has its own
# executable target. Pass an explicit EXE when several tests
# share one binary and differ only in CLI args / fixtures
# (e.g. test-cpu-caches-{turbo,mtl} all invoke test-cpu-caches.exe).
if (NOT DEFINED TCRT_EXE)
set(TCRT_EXE ${target})
endif()
add_test(NAME ${target}
COMMAND $<TARGET_FILE:${TCRT_EXE}> ${TCRT_ARGS})
set_property(TEST ${target} PROPERTY LABELS ${TCRT_LABEL})
if (WIN32)
# When ggml builds as shared (the MSVC default), test exes
# link against import libs and need to find speech-ggml-*.dll
# at runtime. Windows has no rpath, so we prepend the
# build-tree DLL output dirs to PATH. Both `bin/` (Ninja /
# single-config) and `bin/<Config>/` (Visual Studio /
# multi-config) are listed because the generator type isn't
# known at configure time on a fresh checkout; the
# generator-expression $<CONFIG> resolves to the active
# config at test time. Without this, ctest sees
# STATUS_DLL_NOT_FOUND (0xc0000135) before main() runs.
set_property(TEST ${target} PROPERTY ENVIRONMENT
"PATH=${CMAKE_BINARY_DIR}/bin/$<CONFIG>\;${CMAKE_BINARY_DIR}/bin\;$ENV{PATH}")
endif()
set(_tcrt_missing "")
foreach (_tcrt_req IN LISTS TCRT_REQUIRES)
if (NOT EXISTS "${_tcrt_req}")
list(APPEND _tcrt_missing "${_tcrt_req}")
endif()
endforeach()
if (_tcrt_missing)
set_property(TEST ${target} PROPERTY DISABLED TRUE)
list(JOIN _tcrt_missing "\n " _tcrt_missing_pretty)
message(STATUS "tts-cpp: test ${target} disabled (missing fixture(s):\n ${_tcrt_missing_pretty})")
endif()
endfunction()
# Stable shorthands for the fixtures referenced by the registrations
# below. Resolved at configure time; missing files turn into
# DISABLED tests via tts_cpp_register_test(REQUIRES ...).
set(_tcb_t3_turbo_gguf "${TTS_CPP_TEST_MODEL_DIR}/chatterbox-t3-turbo.gguf")
set(_tcb_t3_mtl_gguf "${TTS_CPP_TEST_MODEL_DIR}/chatterbox-t3-mtl.gguf")
set(_tcb_t3_mtl_q4_gguf "${TTS_CPP_TEST_MODEL_DIR}/chatterbox-t3-mtl-q4_0.gguf")
set(_tcb_s3gen_gguf "${TTS_CPP_TEST_MODEL_DIR}/chatterbox-s3gen.gguf")
set(_tcb_s3gen_mtl_gguf "${TTS_CPP_TEST_MODEL_DIR}/chatterbox-s3gen-mtl.gguf")
set(_tcb_super_gguf "${TTS_CPP_TEST_MODEL_DIR}/supertonic.gguf")
set(_tcb_super_ref "${TTS_CPP_TEST_REF_DIR}/supertonic-ref-quick")
set(_tcb_super_ref_noise "${_tcb_super_ref}/noise.npy")
set(_tcb_s3gen_ref "${TTS_CPP_TEST_REF_DIR}/s3gen-ref")
set(_tcb_s3gen_ref_logits "${_tcb_s3gen_ref}/logits.npy")
set(_tcb_t3_mtl_ref "${TTS_CPP_TEST_REF_DIR}/t3-mtl-ref")
set(_tcb_streaming_ref "${TTS_CPP_TEST_REF_DIR}/streaming-ref")
set(_tcb_jfk_wav "${TTS_CPP_TEST_AUDIO_DIR}/jfk.wav")
add_executable(test-s3gen test/test_s3gen.cpp)
target_link_libraries(test-s3gen PRIVATE ggml)
target_include_directories(test-s3gen PRIVATE ggml/include src)
tts_cpp_apply_ccache(test-s3gen)
tts_cpp_register_test(test-s3gen
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}" "ALL"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref_logits}")
add_executable(test-voice-features
test/test_voice_features.cpp
src/voice_features.cpp
src/mel_extract_stft.cpp
src/backend_selection.cpp)
target_link_libraries(test-voice-features PRIVATE ggml)
target_include_directories(test-voice-features PRIVATE ggml/include src)
tts_cpp_apply_ccache(test-voice-features)
tts_cpp_register_test(test-voice-features
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_jfk_wav}" "${_tcb_s3gen_ref}/prompt_feat.npy"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_jfk_wav}" "${_tcb_s3gen_ref}/prompt_feat.npy")
add_executable(test-resample
test/test_resample.cpp
src/voice_features.cpp
src/mel_extract_stft.cpp
src/backend_selection.cpp)
target_link_libraries(test-resample PRIVATE ggml)
target_include_directories(test-resample PRIVATE src)
tts_cpp_apply_ccache(test-resample)
tts_cpp_register_test(test-resample LABEL "unit")
add_executable(test-voice-encoder
test/test_voice_encoder.cpp
src/voice_encoder.cpp
src/voice_features.cpp
src/mel_extract_stft.cpp
src/backend_selection.cpp)
target_link_libraries(test-voice-encoder PRIVATE ggml)
target_include_directories(test-voice-encoder PRIVATE ggml/include src)
tts_cpp_apply_ccache(test-voice-encoder)
tts_cpp_register_test(test-voice-encoder
LABEL "fixture"
ARGS "${_tcb_t3_turbo_gguf}" "${_tcb_jfk_wav}" "${_tcb_s3gen_ref}/speaker_emb.npy"
REQUIRES "${_tcb_t3_turbo_gguf}" "${_tcb_jfk_wav}" "${_tcb_s3gen_ref}/speaker_emb.npy")
add_executable(test-campplus
test/test_campplus.cpp
src/campplus.cpp)
target_link_libraries(test-campplus PRIVATE ggml)
target_include_directories(test-campplus PRIVATE ggml/include src)
if (OpenMP_CXX_FOUND)
target_link_libraries(test-campplus PRIVATE OpenMP::OpenMP_CXX)
endif()
tts_cpp_apply_ccache(test-campplus)
tts_cpp_register_test(test-campplus
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/fbank.npy" "${_tcb_s3gen_ref}/embedding.npy"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/fbank.npy" "${_tcb_s3gen_ref}/embedding.npy")
add_executable(test-fbank
test/test_fbank.cpp
src/voice_features.cpp
src/mel_extract_stft.cpp
src/backend_selection.cpp)
target_link_libraries(test-fbank PRIVATE ggml)
target_include_directories(test-fbank PRIVATE ggml/include src)
tts_cpp_apply_ccache(test-fbank)
tts_cpp_register_test(test-fbank
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/wav_16k.npy" "${_tcb_s3gen_ref}/fbank.npy"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/wav_16k.npy" "${_tcb_s3gen_ref}/fbank.npy")
add_executable(test-voice-embedding
test/test_voice_embedding.cpp
src/campplus.cpp
src/voice_features.cpp
src/mel_extract_stft.cpp
src/backend_selection.cpp)
target_link_libraries(test-voice-embedding PRIVATE ggml)
target_include_directories(test-voice-embedding PRIVATE ggml/include src)
if (OpenMP_CXX_FOUND)
target_link_libraries(test-voice-embedding PRIVATE OpenMP::OpenMP_CXX)
endif()
tts_cpp_apply_ccache(test-voice-embedding)
tts_cpp_register_test(test-voice-embedding
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/wav_16k.npy" "${_tcb_s3gen_ref}/embedding.npy"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/wav_16k.npy" "${_tcb_s3gen_ref}/embedding.npy")
add_executable(test-s3tokenizer
test/test_s3tokenizer.cpp
src/s3tokenizer.cpp
src/backend_selection.cpp)
target_link_libraries(test-s3tokenizer PRIVATE ggml)
target_include_directories(test-s3tokenizer PRIVATE ggml/include src)
tts_cpp_apply_ccache(test-s3tokenizer)
tts_cpp_register_test(test-s3tokenizer
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/wav_16k.npy" "${_tcb_s3gen_ref}/log_mel.npy" "${_tcb_s3gen_ref}/speech_tokens.npy"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_s3gen_ref}/wav_16k.npy" "${_tcb_s3gen_ref}/log_mel.npy" "${_tcb_s3gen_ref}/speech_tokens.npy")
# Attention-free end-of-speech stop controller. Pure host
# logic with no model / ggml dependency, so it links only its own source
# and always runs (no fixture REQUIRES).
add_executable(test-t3-stop-controller
test/test_t3_stop_controller.cpp
src/t3_stop_controller.cpp)
target_include_directories(test-t3-stop-controller PRIVATE src)
tts_cpp_apply_ccache(test-t3-stop-controller)
tts_cpp_register_test(test-t3-stop-controller LABEL "cpu")
# Phase 2: alignment analyzer state machine. Pure host logic,
# no model / ggml dependency, so it always runs.
add_executable(test-t3-alignment-analyzer
test/test_t3_alignment_analyzer.cpp
src/t3_alignment_analyzer.cpp)
target_include_directories(test-t3-alignment-analyzer PRIVATE src)
tts_cpp_apply_ccache(test-t3-alignment-analyzer)
tts_cpp_register_test(test-t3-alignment-analyzer LABEL "cpu")
# MTL sampler EOS-suppression. Pure logit math but links the
# library (sample_next_token_mtl lives in the t3_mtl TU); no model required.
add_executable(test-mtl-sampler test/test_mtl_sampler.cpp)
target_link_libraries(test-mtl-sampler PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-mtl-sampler PRIVATE ggml/include src)
tts_cpp_apply_ccache(test-mtl-sampler)
tts_cpp_register_test(test-mtl-sampler LABEL "cpu")
add_executable(test-mtl-tokenizer test/test_mtl_tokenizer.cpp)
target_link_libraries(test-mtl-tokenizer PRIVATE tts-cpp)
target_include_directories(test-mtl-tokenizer PRIVATE src)
tts_cpp_apply_ccache(test-mtl-tokenizer)
tts_cpp_register_test(test-mtl-tokenizer
LABEL "fixture"
ARGS "${TTS_CPP_TEST_REF_DIR}/mtl-tokenizer/tokenizer.json" "${TTS_CPP_TEST_REF_DIR}/mtl-tokenizer/golden.json"
REQUIRES "${TTS_CPP_TEST_REF_DIR}/mtl-tokenizer/tokenizer.json" "${TTS_CPP_TEST_REF_DIR}/mtl-tokenizer/golden.json")
add_executable(test-t3-mtl test/test_t3_mtl.cpp)
target_link_libraries(test-t3-mtl PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-t3-mtl PRIVATE ggml/include src include)
tts_cpp_apply_ccache(test-t3-mtl)
tts_cpp_register_test(test-t3-mtl
LABEL "fixture"
ARGS "${_tcb_t3_mtl_gguf}" "${_tcb_t3_mtl_ref}"
REQUIRES "${_tcb_t3_mtl_gguf}" "${_tcb_t3_mtl_ref}")
add_executable(test-t3-mtl-stages test/test_t3_mtl_stages.cpp)
target_link_libraries(test-t3-mtl-stages PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-t3-mtl-stages PRIVATE ggml/include src include)
tts_cpp_apply_ccache(test-t3-mtl-stages)
tts_cpp_register_test(test-t3-mtl-stages
LABEL "fixture"
ARGS "${_tcb_t3_mtl_gguf}" "${_tcb_t3_mtl_ref}" "ALL"
REQUIRES "${_tcb_t3_mtl_gguf}" "${_tcb_t3_mtl_ref}")
# ------------------------------------------------------------------
# Multilingual integration tests (23 languages).
# ------------------------------------------------------------------
add_executable(test-multilingual-synth test/test_multilingual_synth.cpp)
target_link_libraries(test-multilingual-synth PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-multilingual-synth PRIVATE ggml/include src include)
tts_cpp_apply_ccache(test-multilingual-synth)
set(_mtl_phrases
"en|Hello, this is a multilingual text-to-speech test.|tier1"
"es|Hola, esta es una prueba multilingüe de síntesis de voz.|tier1"
"fr|Bonjour, ceci est un test multilingue de synthèse vocale.|tier1"
"de|Hallo, dies ist ein mehrsprachiger Sprachsynthese-Test.|tier1"
"it|Ciao, questo è un test multilingue di sintesi vocale.|tier1"
"pt|Olá, este é um teste multilíngue de síntese de voz.|tier1"
"nl|Hallo, dit is een meertalige spraaksynthese-test.|tier1"
"pl|Cześć, to jest wielojęzyczny test syntezy mowy.|tier1"
"tr|Merhaba, bu çok dilli bir konuşma sentezi testidir.|tier1"
"sv|Hej, detta är ett flerspråkigt talsyntestest.|tier1"
"da|Hej, dette er en flersproget talesyntese-test.|tier1"
"fi|Hei, tämä on monikielinen puhesynteesitesti.|tier1"
"no|Hei, dette er en flerspråklig talesyntese-test.|tier1"
"el|Γεια σας, αυτή είναι μια πολύγλωσση δοκιμή σύνθεσης ομιλίας.|tier1"
"ms|Helo, ini ialah ujian sintesis pertuturan berbilang bahasa.|tier1"
"sw|Habari, huu ni mtihani wa usanisi wa sauti wa lugha nyingi.|tier1"
"ar|مرحبًا، هذا اختبار متعدد اللغات لتحويل النص إلى كلام.|tier1"
"ko|안녕하세요, 이것은 다국어 음성 합성 테스트입니다.|tier1"
"ja|こんにちは、これは多言語の音声合成テストです。|tier2"
"he|שלום, זוהי בדיקת סינתזת דיבור רב-לשונית.|tier2"
"ru|Привет, это многоязычный тест синтеза речи.|tier2"
# zh intentionally omitted: Cangjie preprocessing yields ~97% CER and
# is excluded from mtl_tokenizer::supported_languages() until reworked.
"hi|नमस्ते, यह एक बहुभाषी वाक् संश्लेषण परीक्षण है।|tier2"
)
set(_mtl_out_dir "${CMAKE_BINARY_DIR}/test-multilingual-out")
file(MAKE_DIRECTORY "${_mtl_out_dir}")
# Optional L3 (ASR) verification via whisper-cli.
find_program(WHISPER_CLI NAMES whisper-cli)
set(WHISPER_MODEL "${CMAKE_SOURCE_DIR}/models/ggml-large-v3-turbo-q5_0.bin"
CACHE FILEPATH "Path to a multilingual Whisper ggml model used by mtl-asr-* tests")
if (WHISPER_CLI AND EXISTS "${WHISPER_MODEL}")
set(_mtl_asr_enabled ON)
message(STATUS "Multilingual L3 (ASR) tests: ENABLED")
message(STATUS " whisper-cli : ${WHISPER_CLI}")
message(STATUS " model : ${WHISPER_MODEL}")
add_executable(test-multilingual-asr test/test_multilingual_asr.cpp)
target_include_directories(test-multilingual-asr PRIVATE src include)
tts_cpp_apply_ccache(test-multilingual-asr)
else()
set(_mtl_asr_enabled OFF)
message(STATUS "Multilingual L3 (ASR) tests: SKIPPED")
if (NOT WHISPER_CLI)
message(STATUS " reason: whisper-cli not found on PATH")
endif()
if (NOT EXISTS "${WHISPER_MODEL}")
message(STATUS " reason: model missing at ${WHISPER_MODEL}")
endif()
endif()
foreach(_entry IN LISTS _mtl_phrases)
string(REPLACE "|" ";" _parts "${_entry}")
list(GET _parts 0 _lang)
list(GET _parts 1 _text)
list(GET _parts 2 _tier)
add_test(
NAME mtl-synth-${_lang}
COMMAND $<TARGET_FILE:test-multilingual-synth>
--lang "${_lang}"
--text "${_text}"
--out "${_mtl_out_dir}/${_lang}.wav"
)
set_tests_properties(mtl-synth-${_lang} PROPERTIES
LABELS "multilingual;mtl-${_tier}"
TIMEOUT 180
)
if (_mtl_asr_enabled)
add_test(
NAME mtl-asr-${_lang}
COMMAND $<TARGET_FILE:test-multilingual-asr>
--whisper-cli "${WHISPER_CLI}"
--model "${WHISPER_MODEL}"
--lang "${_lang}"
--wav "${_mtl_out_dir}/${_lang}.wav"
--expected "${_text}"
--txt-prefix "${_mtl_out_dir}/${_lang}.asr"
)
set_tests_properties(mtl-asr-${_lang} PROPERTIES
LABELS "multilingual;asr;mtl-${_tier}-asr"
DEPENDS mtl-synth-${_lang}
TIMEOUT 120
)
endif()
endforeach()
# End-to-end EOS round-trip regression. Drives tts-cli to
# synthesize a set of English phrases, transcribes with whisper-cli, and
# asserts the transcription is close to the input (CER guard -> catches
# clipping) and not much longer (ramble guard -> catches the trailing-token
# bug re-appearing). Pure orchestration; auto-disabled unless the MTL
# GGUFs + whisper-cli + a Whisper model + tts-cli are all present.
add_executable(test-eos-roundtrip test/test_eos_roundtrip.cpp)
target_include_directories(test-eos-roundtrip PRIVATE src)
tts_cpp_apply_ccache(test-eos-roundtrip)
if (_mtl_asr_enabled AND TARGET tts-cli)
tts_cpp_register_test(test-eos-roundtrip
LABEL "fixture;asr"
ARGS --tts-cli "$<TARGET_FILE:tts-cli>"
--t3 "${_tcb_t3_mtl_gguf}"
--s3gen "${_tcb_s3gen_mtl_gguf}"
--whisper-cli "${WHISPER_CLI}"
--whisper-model "${WHISPER_MODEL}"
--lang "en"
--tmp "${_mtl_out_dir}"
REQUIRES "${_tcb_t3_mtl_gguf}" "${_tcb_s3gen_mtl_gguf}")
# Same round-trip on the quantized (Q4_0) T3 the mobile
# app ships, to confirm the aligned heads still track under
# quantization. Uses the f16 S3Gen (the S3Gen Q4_0 path is a separate
# converter limitation). Auto-disabled until the Q4_0 T3 GGUF exists.
tts_cpp_register_test(test-eos-roundtrip-q4_0
EXE test-eos-roundtrip
LABEL "fixture;asr"
ARGS --tts-cli "$<TARGET_FILE:tts-cli>"
--t3 "${_tcb_t3_mtl_q4_gguf}"
--s3gen "${_tcb_s3gen_mtl_gguf}"
--whisper-cli "${WHISPER_CLI}"
--whisper-model "${WHISPER_MODEL}"
--lang "en"
--tmp "${_mtl_out_dir}"
REQUIRES "${_tcb_t3_mtl_q4_gguf}" "${_tcb_s3gen_mtl_gguf}")
else()
message(STATUS "tts-cpp: test-eos-roundtrip built but not registered (needs whisper-cli + model + tts-cli)")
endif()
# QVAC-21118: chunk-streaming loudness regression. Drives the Engine API
# directly (the path the qvac tts-ggml addon uses): batch vs streaming with
# a low stream_cfm_steps, asserting the streaming output matches batch
# loudness and does not clip. PRE-FIX this FAILS (streaming ~full scale,
# RMS 4-9x batch). Uses the S3Gen built-in voice (no voice dir), so it
# only needs the MTL GGUFs; auto-disabled otherwise.
add_executable(test-streaming-loudness test/test_streaming_loudness.cpp)
target_link_libraries(test-streaming-loudness PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-streaming-loudness PRIVATE include ggml/include src)
tts_cpp_apply_ccache(test-streaming-loudness)
tts_cpp_register_test(test-streaming-loudness
LABEL "fixture"
ARGS --t3 "${_tcb_t3_mtl_gguf}" --s3gen "${_tcb_s3gen_mtl_gguf}" --cfm-steps 2
REQUIRES "${_tcb_t3_mtl_gguf}" "${_tcb_s3gen_mtl_gguf}")
add_executable(test-streaming
test/test_streaming.cpp
src/chatterbox_tts.cpp
src/backend_selection.cpp)
target_link_libraries(test-streaming PRIVATE ggml tts-cpp-backend-defs)
target_include_directories(test-streaming PRIVATE ggml/include src include)
tts_cpp_apply_ccache(test-streaming)
tts_cpp_register_test(test-streaming
LABEL "fixture"
ARGS "${_tcb_s3gen_gguf}" "${_tcb_streaming_ref}"
REQUIRES "${_tcb_s3gen_gguf}" "${_tcb_streaming_ref}")
# Pure sentence-splitter unit test (no model/fixture) -- always runs.
# Pins split_text_for_tts after it was lifted into the shared
# text_preprocess unit (used by both the CLI and the Engine).
add_executable(test-text-split test/test_text_split.cpp)
target_link_libraries(test-text-split PRIVATE tts-cpp)
target_include_directories(test-text-split PRIVATE src)
tts_cpp_apply_ccache(test-text-split)
tts_cpp_register_test(test-text-split LABEL "cpu")
# LavaSR DSP primitives (QVAC-16579): resampler, STFT/ISTFT, Slaney mel,
# FastLR crossover merge. Pure host math (no model / no ggml), compiled
# straight from the lavasr/dsp sources so it always runs in CI.
add_executable(test-lavasr-dsp
test/test_lavasr_dsp.cpp
src/lavasr/dsp/resampler.cpp
src/lavasr/dsp/stft_processor.cpp
src/lavasr/dsp/mel_filterbank.cpp
src/lavasr/dsp/fastlr_merge.cpp)
target_include_directories(test-lavasr-dsp PRIVATE src)
tts_cpp_apply_ccache(test-lavasr-dsp)
tts_cpp_register_test(test-lavasr-dsp LABEL "unit")
# LavaSR enhancer scalar-core parity test (QVAC-16579): numerical parity of
# the neural core (mel -> backbone -> spec head -> real/imag) against an
# onnxruntime golden, plus a finite/length smoke test of the full enhance()
# pipeline. Pure host math (no ggml); fixtures from
# scripts/dump-lavasr-enhancer-fixtures.py. The end-to-end DSP comparison
# (resampler/mel/ISTFT/FastLR vs the numpy reference) lives in
# test-lavasr-enhancer-gguf below. DISABLED until the fixtures exist
# (REQUIRES the golden real.npy).
add_executable(test-lavasr-enhancer-core
test/test_lavasr_enhancer_core.cpp
src/lavasr/enhancer_core.cpp
src/lavasr/enhancer.cpp
src/lavasr/dsp/resampler.cpp
src/lavasr/dsp/stft_processor.cpp
src/lavasr/dsp/mel_filterbank.cpp
src/lavasr/dsp/fastlr_merge.cpp)
target_include_directories(test-lavasr-enhancer-core PRIVATE src)
tts_cpp_apply_ccache(test-lavasr-enhancer-core)
tts_cpp_register_test(test-lavasr-enhancer-core
LABEL "fixture"
ARGS "${TTS_CPP_TEST_REF_DIR}/lavasr-enhancer"
REQUIRES "${TTS_CPP_TEST_REF_DIR}/lavasr-enhancer/real.npy")
# LavaSR enhancer GGUF round-trip (QVAC-16579): load the converted GGUF via
# load_enhancer_gguf() + the public tts_cpp::lavasr::Enhancer API, compare
# the neural core to the onnxruntime golden, AND compare the full
# enhance(pcm_in) output (resampler + mel + ISTFT + FastLR + neural) against
# the numpy end-to-end golden (enhanced_48k.npy). Links ggml for GGUF I/O
# (mirrors test-gguf-stream). DISABLED until the GGUF + fixtures exist
# (convert-lavasr-enhancer-to-gguf.py + dump-lavasr-enhancer-fixtures.py).
add_executable(test-lavasr-enhancer-gguf test/test_lavasr_enhancer_gguf.cpp)
target_link_libraries(test-lavasr-enhancer-gguf PRIVATE tts-cpp ggml)
target_include_directories(test-lavasr-enhancer-gguf PRIVATE include src ggml/include)
tts_cpp_apply_ccache(test-lavasr-enhancer-gguf)
tts_cpp_register_test(test-lavasr-enhancer-gguf
LABEL "fixture"
ARGS "${TTS_CPP_TEST_MODEL_DIR}/lavasr-enhancer.gguf" "${TTS_CPP_TEST_REF_DIR}/lavasr-enhancer"
REQUIRES "${TTS_CPP_TEST_MODEL_DIR}/lavasr-enhancer.gguf" "${TTS_CPP_TEST_REF_DIR}/lavasr-enhancer/real.npy")
# QVAC-20979 — voice-clone test harness.
#
# Two header-only, model-free harnesses that every later cloning task builds
# on: a finite-difference gradient checker (validates future Supertonic
# analytic backward passes) and a speaker-similarity / WER metrics module.
# Both register as "unit" with NO REQUIRES so they ALWAYS run on a fresh
# checkout (the no-skip test policy) — they need no model and the metrics
# fixtures are committed in-tree under test/fixtures/voiceclone/v1/.
add_executable(test-voiceclone-gradcheck
test/test_voiceclone_gradcheck.cpp
src/voiceclone_gradcheck.cpp)
target_include_directories(test-voiceclone-gradcheck PRIVATE src)
tts_cpp_apply_ccache(test-voiceclone-gradcheck)
tts_cpp_register_test(test-voiceclone-gradcheck LABEL "unit")
add_executable(test-voiceclone-metrics
test/test_voiceclone_metrics.cpp
src/voiceclone_metrics.cpp)
target_include_directories(test-voiceclone-metrics PRIVATE src)
tts_cpp_apply_ccache(test-voiceclone-metrics)
# The committed fixture dir is passed as argv[1] so the reproduction layer
# always runs; the closed-form self-tests run regardless of the argument.
tts_cpp_register_test(test-voiceclone-metrics
LABEL "unit"
ARGS "${CMAKE_CURRENT_SOURCE_DIR}/test/fixtures/voiceclone/v1")
# Ticket 4 — analytic backward of the style-dependent text-encoder tail
# (speech-prompted attention + final layer norm). Model-free: every
# analytic gradient is gradchecked against finite differences, so it ALWAYS
# runs on a fresh checkout (no-skip policy, no model/fixtures needed).
add_executable(test-supertonic-text-encoder-backward
test/test_supertonic_text_encoder_backward.cpp
src/supertonic_text_encoder_backward.cpp
src/voiceclone_gradcheck.cpp)
target_include_directories(test-supertonic-text-encoder-backward PRIVATE src)
tts_cpp_apply_ccache(test-supertonic-text-encoder-backward)
tts_cpp_register_test(test-supertonic-text-encoder-backward LABEL "unit")
# QVAC-20982 — analytic backward of the vector estimator / CFM step
# (convnext, depthwise conv, layer norm, gelu, rope + style cross-attention,
# full field and per-step d_in/d_style). Model-free: every analytic
# gradient is gradchecked against finite differences via the Task 2 harness,
# so it ALWAYS runs on a fresh checkout (no-skip policy, no model/fixtures).
add_executable(test-supertonic-vector-estimator-backward
test/test_supertonic_vector_estimator_backward.cpp
src/supertonic_vector_estimator_backward.cpp
src/voiceclone_gradcheck.cpp)
target_include_directories(test-supertonic-vector-estimator-backward PRIVATE src)
tts_cpp_apply_ccache(test-supertonic-vector-estimator-backward)
tts_cpp_register_test(test-supertonic-vector-estimator-backward LABEL "unit")
# QVAC-20984 — analytic backward of the CAMPPlus speaker encoder (FCM Conv2d
# head + residual blocks, TDNN, CAM dense-TDNN blocks with context-attention
# gating and dense concat, statistics pooling, dense head). Model-free: every
# analytic input-gradient is gradchecked against finite differences via the
# Task 2 harness, so it ALWAYS runs on a fresh checkout (no-skip policy, no
# model/fixtures needed).
add_executable(test-campplus-backward
test/test_campplus_backward.cpp
src/campplus_backward.cpp
src/voiceclone_gradcheck.cpp)
target_include_directories(test-campplus-backward PRIVATE src)
tts_cpp_apply_ccache(test-campplus-backward)
tts_cpp_register_test(test-campplus-backward LABEL "unit")
# Forward-parity: the analytic double forward must match the production scalar
# CAMPPlus forward (campplus_embed_cpu) on synthetic weights, anchoring the
# gradcheck to the real model. Links campplus.cpp -> ggml.
add_executable(test-campplus-backward-parity
test/test_campplus_backward_parity.cpp
src/campplus_backward.cpp
src/campplus.cpp)
target_link_libraries(test-campplus-backward-parity PRIVATE ggml)
target_include_directories(test-campplus-backward-parity PRIVATE ggml/include src)
if (OpenMP_CXX_FOUND)
target_link_libraries(test-campplus-backward-parity PRIVATE OpenMP::OpenMP_CXX)
endif()
tts_cpp_apply_ccache(test-campplus-backward-parity)
tts_cpp_register_test(test-campplus-backward-parity LABEL "unit")
# Engine-level streaming-callback contract test for the per-sentence
# segmentation path (Fix #2): monotonic global chunk_index, single final
# is_last, result.pcm == concat(callbacks), accumulated stats. Gated on
# the chatterbox Turbo T3 + S3Gen GGUFs.
add_executable(test-chatterbox-engine-stream test/test_chatterbox_engine_stream.cpp)
target_link_libraries(test-chatterbox-engine-stream PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-chatterbox-engine-stream PRIVATE ggml/include src include)
tts_cpp_apply_ccache(test-chatterbox-engine-stream)
tts_cpp_register_test(test-chatterbox-engine-stream
LABEL "fixture"
ARGS "${_tcb_t3_turbo_gguf}" "${_tcb_s3gen_gguf}"
REQUIRES "${_tcb_t3_turbo_gguf}" "${_tcb_s3gen_gguf}")
# QVAC-21483 — output-frequency selection on the chatterbox::Engine API
# (batch + streaming + validation). Uses the multilingual fixtures so it
# runs anywhere the mtl-synth tests do; auto-disabled when they're absent.
add_executable(test-output-sample-rate test/test_output_sample_rate.cpp)
target_link_libraries(test-output-sample-rate PRIVATE tts-cpp ggml tts-cpp-backend-defs)
target_include_directories(test-output-sample-rate PRIVATE ggml/include src include)
tts_cpp_apply_ccache(test-output-sample-rate)
tts_cpp_register_test(test-output-sample-rate
LABEL "fixture"
ARGS "${_tcb_t3_mtl_gguf}" "${_tcb_s3gen_mtl_gguf}"
REQUIRES "${_tcb_t3_mtl_gguf}" "${_tcb_s3gen_mtl_gguf}")
# QVAC-21483 — Supertonic sibling of the above: native rate, 16 kHz batch
# ratio, construction rejection, streaming result.pcm == concat, and the
# streaming-equals-whole-buffer-resample batch-exact property. Gated on the
# Supertonic GGUF fixture; auto-disabled when it's absent.
add_executable(test-output-sample-rate-supertonic test/test_output_sample_rate_supertonic.cpp)