Skip to content

Commit ad6274f

Browse files
committed
parakeet : add support for NVIDIA Parakeet
1 parent 95ea8f9 commit ad6274f

19 files changed

Lines changed: 6038 additions & 1 deletion

CMakeLists.txt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,20 @@ set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location
179179
get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
180180

181181
set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
182+
182183
install(TARGETS whisper LIBRARY PUBLIC_HEADER)
183184

184185
target_compile_definitions(whisper PRIVATE
185186
WHISPER_VERSION="${PROJECT_VERSION}"
186187
)
187188

189+
set_target_properties(parakeet PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/parakeet.h)
190+
install(TARGETS parakeet LIBRARY PUBLIC_HEADER)
191+
192+
target_compile_definitions(parakeet PRIVATE
193+
PARAKEET_VERSION="${PROJECT_VERSION}"
194+
)
195+
188196
configure_package_config_file(
189197
${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
190198
${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
@@ -210,6 +218,35 @@ configure_file(cmake/whisper.pc.in
210218
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
211219
DESTINATION lib/pkgconfig)
212220

221+
set(PARAKEET_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
222+
set(PARAKEET_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
223+
set(PARAKEET_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
224+
225+
configure_package_config_file(
226+
${CMAKE_CURRENT_SOURCE_DIR}/cmake/parakeet-config.cmake.in
227+
${CMAKE_CURRENT_BINARY_DIR}/parakeet-config.cmake
228+
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/parakeet
229+
PATH_VARS
230+
PARAKEET_INCLUDE_INSTALL_DIR
231+
PARAKEET_LIB_INSTALL_DIR
232+
PARAKEET_BIN_INSTALL_DIR)
233+
234+
write_basic_package_version_file(
235+
${CMAKE_CURRENT_BINARY_DIR}/parakeet-version.cmake
236+
VERSION ${WHISPER_INSTALL_VERSION}
237+
COMPATIBILITY SameMajorVersion)
238+
239+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/parakeet-config.cmake
240+
${CMAKE_CURRENT_BINARY_DIR}/parakeet-version.cmake
241+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/parakeet)
242+
243+
configure_file(cmake/parakeet.pc.in
244+
"${CMAKE_CURRENT_BINARY_DIR}/parakeet.pc"
245+
@ONLY)
246+
247+
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/parakeet.pc"
248+
DESTINATION lib/pkgconfig)
249+
213250
#
214251
# programs, examples and tests
215252
#

bindings/ruby/ext/extconf.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@
1818
#{libs}: cmake-targets
1919
cmake-targets:
2020
#{"\t"}#{cmake} -S sources -B build -D BUILD_SHARED_LIBS=OFF -D CMAKE_ARCHIVE_OUTPUT_DIRECTORY=#{__dir__} -D CMAKE_POSITION_INDEPENDENT_CODE=ON #{options}
21-
#{"\t"}#{cmake} --build build --config Release --target common whisper
21+
#{"\t"}#{cmake} --build build --config Release --target common whisper parakeet
2222
EOF
2323
end

cmake/parakeet-config.cmake.in

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
set(PARAKEET_VERSION @WHISPER_INSTALL_VERSION@)
2+
set(PARAKEET_BUILD_COMMIT @WHISPER_BUILD_COMMIT@)
3+
set(PARAKEET_BUILD_NUMBER @WHISPER_BUILD_NUMBER@)
4+
set(PARAKEET_SHARED_LIB @BUILD_SHARED_LIBS@)
5+
6+
@PACKAGE_INIT@
7+
8+
set_and_check(PARAKEET_INCLUDE_DIR "@PACKAGE_PARAKEET_INCLUDE_INSTALL_DIR@")
9+
set_and_check(PARAKEET_LIB_DIR "@PACKAGE_PARAKEET_LIB_INSTALL_DIR@")
10+
set_and_check(PARAKEET_BIN_DIR "@PACKAGE_PARAKEET_BIN_INSTALL_DIR@")
11+
12+
find_package(ggml REQUIRED HINTS ${PARAKEET_LIB_DIR}/cmake)
13+
14+
find_library(parakeet_LIBRARY parakeet
15+
REQUIRED
16+
HINTS ${PARAKEET_LIB_DIR}
17+
NO_CMAKE_FIND_ROOT_PATH
18+
)
19+
20+
add_library(parakeet UNKNOWN IMPORTED)
21+
set_target_properties(parakeet
22+
PROPERTIES
23+
INTERFACE_INCLUDE_DIRECTORIES "${PARAKEET_INCLUDE_DIR}"
24+
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
25+
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
26+
IMPORTED_LOCATION "${parakeet_LIBRARY}"
27+
INTERFACE_COMPILE_FEATURES cxx_std_11
28+
POSITION_INDEPENDENT_CODE ON)
29+
30+
check_required_components(parakeet)

cmake/parakeet.pc.in

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
prefix=@CMAKE_INSTALL_PREFIX@
2+
exec_prefix=${prefix}
3+
libdir=${exec_prefix}/lib
4+
includedir=${prefix}/include
5+
6+
Name: parakeet
7+
Description: Port of NVIDIA's Parakeet model in C/C++
8+
Version: @PROJECT_VERSION@
9+
Libs: -L${libdir} -lggml -lggml-base -lparakeet
10+
Cflags: -I${includedir}

examples/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ else()
107107
add_subdirectory(server)
108108
add_subdirectory(quantize)
109109
add_subdirectory(vad-speech-segments)
110+
add_subdirectory(parakeet-cli)
110111
if (WHISPER_SDL2)
111112
add_subdirectory(stream)
112113
add_subdirectory(command)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
set(TARGET parakeet-cli)
2+
add_executable(${TARGET} parakeet-cli.cpp)
3+
4+
include(DefaultTargetOptions)
5+
6+
target_link_libraries(${TARGET} PRIVATE common parakeet ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
7+
8+
install(TARGETS ${TARGET} RUNTIME)

examples/parakeet-cli/README.md

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# whisper.cpp/examples/parakeet-cli
2+
3+
This is an example of using the [Parakeet] model in whisper.cpp.
4+
5+
### Download converted model
6+
```console
7+
$ hf download danbev/parakeet parakeet-tdt-0.6b-v3.bin --local-dir models
8+
```
9+
10+
### Building
11+
```console
12+
$ cmake -B build -S .
13+
$ cmake --build build --target parakeet-cli -j 12
14+
```
15+
16+
### Usage
17+
```console
18+
$ ./build/bin/parakeet-cli --help
19+
20+
usage: ./build/bin/parakeet-cli [options] file0 file1 ...
21+
supported audio formats: flac, mp3, ogg, wav
22+
23+
options:
24+
-h, --help [default] show this help message and exit
25+
-t N, --threads N [4 ] number of threads to use during computation
26+
-cl N, --chunk-length N [10000 ] chunk length in milliseconds
27+
-lc N, --left-context N [10000 ] left context in milliseconds
28+
-rc N, --right-context N [4960 ] right context in milliseconds
29+
-m, --model FILE [models/ggml-parakeet-tdt-0.6b-v3.bin] model path
30+
-f, --file FILE [ ] input audio file
31+
-ng, --no-gpu [false ] disable GPU
32+
-dev N, --device N [0 ] GPU device to use
33+
-fa, --flash-attn [true ] enable flash attention
34+
-nfa, --no-flash-attn [false ] disable flash attention
35+
-ps, --print-segments [false ] print segment information
36+
```
37+
38+
### Example
39+
```console
40+
$ ./build/bin/parakeet-cli -m models/parakeet-tdt-0.6b-v3.bin -f samples/jfk.wav
41+
Processing audio (176000 samples, 11.00 seconds)
42+
Processing audio: total_frames=1101, chunk_size=1101
43+
parakeet_decode: starting decode with n_frames=138
44+
And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
45+
```
46+
47+
To print segment information:
48+
```console
49+
$ ./build/bin/parakeet-cli -m models/parakeet-tdt-0.6b-v3.bin -f samples/jfk.wav --print-segments
50+
Processing audio (176000 samples, 11.00 seconds)
51+
Processing audio: total_frames=1101, chunk_size=1101
52+
parakeet_decode: starting decode with n_frames=138
53+
And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
54+
55+
Segments (1):
56+
Segment 0: [0 -> 1101] "And so, my fellow Americans, ask not what your country can do for you, ask what you can do for your country."
57+
Tokens [38]:
58+
[ 0] id= 1976 frame= 3 dur_idx= 4 dur_val= 4 p=0.9996 plog=-15.6206 t0= 24 t1= 56 word_start=true "▁And"
59+
[ 1] id= 547 frame= 7 dur_idx= 4 dur_val= 4 p=0.9999 plog=-18.7922 t0= 56 t1= 88 word_start=true "▁so"
60+
[ 2] id= 7877 frame= 11 dur_idx= 2 dur_val= 2 p=0.8451 plog=-14.5929 t0= 88 t1= 88 word_start=false ","
61+
[ 3] id= 1103 frame= 13 dur_idx= 3 dur_val= 3 p=0.9996 plog=-15.6127 t0= 104 t1= 128 word_start=true "▁my"
62+
[ 4] id= 309 frame= 16 dur_idx= 1 dur_val= 1 p=0.9912 plog=-11.9635 t0= 128 t1= 136 word_start=true "▁f"
63+
[ 5] id= 530 frame= 17 dur_idx= 2 dur_val= 2 p=1.0000 plog=-13.5239 t0= 136 t1= 152 word_start=false "ell"
64+
[ 6] id= 596 frame= 19 dur_idx= 3 dur_val= 3 p=1.0000 plog=-16.3120 t0= 152 t1= 176 word_start=false "ow"
65+
[ 7] id= 3213 frame= 22 dur_idx= 4 dur_val= 4 p=0.9999 plog=-10.1462 t0= 176 t1= 208 word_start=true "▁Amer"
66+
[ 8] id= 404 frame= 26 dur_idx= 4 dur_val= 4 p=1.0000 plog=-25.0910 t0= 208 t1= 240 word_start=false "ic"
67+
[ 9] id= 667 frame= 30 dur_idx= 4 dur_val= 4 p=1.0000 plog=-27.1707 t0= 240 t1= 272 word_start=false "ans"
68+
[10] id= 7877 frame= 37 dur_idx= 4 dur_val= 4 p=0.9094 plog=-16.3405 t0= 272 t1= 272 word_start=false ","
69+
[11] id= 279 frame= 41 dur_idx= 4 dur_val= 4 p=0.9980 plog=-19.7244 t0= 328 t1= 360 word_start=true "▁a"
70+
[12] id= 583 frame= 45 dur_idx= 4 dur_val= 4 p=1.0000 plog=-24.5312 t0= 360 t1= 392 word_start=false "sk"
71+
[13] id= 1491 frame= 53 dur_idx= 4 dur_val= 4 p=1.0000 plog=-23.2991 t0= 424 t1= 456 word_start=true "▁not"
72+
[14] id= 3470 frame= 65 dur_idx= 4 dur_val= 4 p=0.9995 plog=-16.7306 t0= 520 t1= 552 word_start=true "▁what"
73+
[15] id= 3629 frame= 69 dur_idx= 2 dur_val= 2 p=0.8139 plog=-11.6486 t0= 552 t1= 568 word_start=true "▁your"
74+
[16] id= 867 frame= 75 dur_idx= 1 dur_val= 1 p=0.9980 plog=-12.5265 t0= 600 t1= 608 word_start=true "▁co"
75+
[17] id= 331 frame= 76 dur_idx= 2 dur_val= 2 p=1.0000 plog=-11.6697 t0= 608 t1= 624 word_start=false "un"
76+
[18] id= 958 frame= 78 dur_idx= 2 dur_val= 2 p=1.0000 plog=-11.3621 t0= 624 t1= 640 word_start=false "tr"
77+
[19] id= 7893 frame= 80 dur_idx= 2 dur_val= 2 p=1.0000 plog=-14.3245 t0= 640 t1= 656 word_start=false "y"
78+
[20] id= 2059 frame= 82 dur_idx= 3 dur_val= 3 p=1.0000 plog=-17.7694 t0= 656 t1= 680 word_start=true "▁can"
79+
[21] id= 458 frame= 85 dur_idx= 4 dur_val= 4 p=1.0000 plog=-23.2510 t0= 680 t1= 712 word_start=true "▁do"
80+
[22] id= 509 frame= 89 dur_idx= 4 dur_val= 4 p=1.0000 plog=-23.0688 t0= 712 t1= 744 word_start=true "▁for"
81+
[23] id= 1180 frame= 93 dur_idx= 4 dur_val= 4 p=0.9999 plog=-25.0567 t0= 744 t1= 776 word_start=true "▁you"
82+
[24] id= 7877 frame= 98 dur_idx= 4 dur_val= 4 p=0.8820 plog=-14.2549 t0= 776 t1= 776 word_start=false ","
83+
[25] id= 279 frame=102 dur_idx= 3 dur_val= 3 p=0.9992 plog=-16.8176 t0= 816 t1= 840 word_start=true "▁a"
84+
[26] id= 583 frame=105 dur_idx= 4 dur_val= 4 p=1.0000 plog=-21.0352 t0= 840 t1= 872 word_start=false "sk"
85+
[27] id= 3470 frame=109 dur_idx= 3 dur_val= 3 p=0.9999 plog=-15.4659 t0= 872 t1= 896 word_start=true "▁what"
86+
[28] id= 1180 frame=112 dur_idx= 4 dur_val= 4 p=0.9997 plog=-17.6392 t0= 896 t1= 928 word_start=true "▁you"
87+
[29] id= 2059 frame=116 dur_idx= 3 dur_val= 3 p=0.9999 plog=-15.5484 t0= 928 t1= 952 word_start=true "▁can"
88+
[30] id= 458 frame=119 dur_idx= 2 dur_val= 2 p=1.0000 plog=-15.9953 t0= 952 t1= 968 word_start=true "▁do"
89+
[31] id= 509 frame=121 dur_idx= 3 dur_val= 3 p=1.0000 plog=-15.9605 t0= 968 t1= 992 word_start=true "▁for"
90+
[32] id= 3629 frame=124 dur_idx= 2 dur_val= 2 p=0.9994 plog=-12.2083 t0= 992 t1=1008 word_start=true "▁your"
91+
[33] id= 867 frame=126 dur_idx= 2 dur_val= 2 p=0.9969 plog=-9.1252 t0=1008 t1=1024 word_start=true "▁co"
92+
[34] id= 331 frame=128 dur_idx= 1 dur_val= 1 p=0.9999 plog=-12.6911 t0=1024 t1=1032 word_start=false "un"
93+
[35] id= 958 frame=129 dur_idx= 1 dur_val= 1 p=1.0000 plog=-8.8885 t0=1032 t1=1040 word_start=false "tr"
94+
[36] id= 7893 frame=130 dur_idx= 2 dur_val= 2 p=1.0000 plog=-14.1441 t0=1040 t1=1056 word_start=false "y"
95+
[37] id= 7883 frame=132 dur_idx= 4 dur_val= 4 p=0.9567 plog=-11.5227 t0=1056 t1=1056 word_start=false "."
96+
```
97+
98+
### Model conversion
99+
Clone the original model from Hugging Face:
100+
```console
101+
$ git clone https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3
102+
```
103+
Convert the model:
104+
```console
105+
(venv) $ python models/convert-parakeet-to-ggml.py \
106+
--model <path to cloned model> \
107+
--use-f32 \
108+
--out-dir models \
109+
--out-name ggml-parakeet-tdt-0.6b-v3.bin
110+
```
111+
112+
[Parakeet]: https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3

0 commit comments

Comments
 (0)