Skip to content

Commit d85b89e

Browse files
michalkulakowskiMichal Kulakowski
andauthored
Speech to text streaming support (#4140)
### 🛠 Summary CVS-181778 CVS-185048 - Streaming support for speech2text endpoint - Disconnection support for both speech2text and text2speech - bugfixing ### 🧪 Checklist - [x] Unit tests added. - [ ] The documentation updated. - [x] Change follows security best practices. `` --------- Co-authored-by: Michal Kulakowski <michalkulakowski@intel.com>
1 parent 3e0f5c7 commit d85b89e

33 files changed

Lines changed: 1529 additions & 265 deletions

demos/common/export_models/export_model.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,14 @@ def add_common_arguments(parser):
134134
name: "S2tExecutor"
135135
input_side_packet: "STT_NODE_RESOURCES:s2t_servable"
136136
calculator: "S2tCalculator"
137+
input_stream: "LOOPBACK:loopback"
137138
input_stream: "HTTP_REQUEST_PAYLOAD:input"
139+
output_stream: "LOOPBACK:loopback"
138140
output_stream: "HTTP_RESPONSE_PAYLOAD:output"
141+
input_stream_info: {
142+
tag_index: 'LOOPBACK:0',
143+
back_edge: true
144+
}
139145
node_options: {
140146
[type.googleapis.com / mediapipe.S2tCalculatorOptions]: {
141147
models_path: "{{model_path}}",
@@ -144,6 +150,16 @@ def add_common_arguments(parser):
144150
enable_word_timestamps: {% if not enable_word_timestamps %}false{% else %}true{% endif%},
145151
}
146152
}
153+
input_stream_handler {
154+
input_stream_handler: "SyncSetInputStreamHandler",
155+
options {
156+
[mediapipe.SyncSetInputStreamHandlerOptions.ext] {
157+
sync_set {
158+
tag_index: "LOOPBACK:0"
159+
}
160+
}
161+
}
162+
}
147163
}
148164
"""
149165

src/BUILD

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ cc_library(
5252
copts = COMMON_STATIC_LIBS_COPTS,
5353
)
5454

55+
ovms_cc_library(
56+
name = "executor_base",
57+
hdrs = ["executor_base.hpp"],
58+
deps = [
59+
"//src:libovmslogging",
60+
],
61+
visibility = ["//visibility:public"],
62+
alwayslink = 1,
63+
)
64+
5565
cc_shared_library(
5666
name = "ovms_shared",
5767
dynamic_deps = [],
@@ -1354,6 +1364,12 @@ ovms_cc_library(
13541364
)
13551365

13561366

1367+
ovms_cc_library(
1368+
name = "sse_utils",
1369+
hdrs = ["sse_utils.hpp"],
1370+
visibility = ["//visibility:public"],
1371+
)
1372+
13571373
ovms_cc_library(
13581374
name = "libovmsstatus",
13591375
hdrs = ["status.hpp",],
@@ -2367,6 +2383,7 @@ cc_test(
23672383
"test/llm/visual_language_model/initialization_test.cpp",
23682384
"test/audio/text2speech_test.cpp",
23692385
"test/audio/speech2text_test.cpp",
2386+
"test/audio/s2t_streaming_test.cpp",
23702387
],
23712388
"//:disable_mediapipe" : [
23722389
"test/disabled_mediapipe_test.cpp",

src/audio/speech_to_text/BUILD

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,50 @@ load("//:common_settings.bzl", "ovms_cc_library")
1919

2020
ovms_cc_library(
2121
name = "s2t_servable",
22-
hdrs = ["s2t_servable.hpp"],
22+
srcs = ["s2t_servable.cpp"],
23+
hdrs = [
24+
"s2t_executor.hpp",
25+
"s2t_servable.hpp",
26+
],
27+
deps = [
28+
"//src:executor_base",
29+
"//src:httppayload",
30+
"//src:libovmslogging",
31+
"//src:libmodelconfigjsonparser",
32+
"//src:libovmsstring_utils",
33+
"s2t_calculator_cc_proto",
34+
"@com_google_absl//absl/status",
35+
"//third_party:genai",
36+
],
37+
visibility = ["//visibility:public"],
38+
alwayslink = 1,
39+
)
40+
41+
ovms_cc_library(
42+
name = "s2t_streaming_handler",
43+
srcs = [
44+
"s2t_streaming_handler.cpp",
45+
"streaming_text_queue.cpp",
46+
],
47+
hdrs = [
48+
"s2t_streaming_handler.hpp",
49+
"streaming_text_queue.hpp",
50+
],
51+
deps = [
52+
"@mediapipe//mediapipe/framework:calculator_framework",
53+
"//src:httppayload",
54+
"//src:libovmsclient_connection",
55+
"//src:libovmslogging",
56+
"//src:libovmsstring_utils",
57+
"//src:libovmsstatus",
58+
"//src:libmodelconfigjsonparser",
59+
"//src:sse_utils",
60+
"//src/port:rapidjson_stringbuffer",
61+
"//src/port:rapidjson_writer",
62+
":s2t_servable",
63+
"s2t_calculator_cc_proto",
64+
"//third_party:genai",
65+
],
2366
visibility = ["//visibility:public"],
2467
alwayslink = 1,
2568
)
@@ -37,11 +80,11 @@ ovms_cc_library(
3780
"//src/port:rapidjson_stringbuffer",
3881
"//src/port:rapidjson_writer",
3982
":s2t_servable",
83+
":s2t_streaming_handler",
4084
"//third_party:genai",
4185
"//src/audio:audio_utils",
4286
"//src:libmodelconfigjsonparser",
4387
"//src/mediapipe_internal:node_initializer",
44-
"//src:libovmsstring_utils",
4588
],
4689
visibility = ["//visibility:public"],
4790
alwayslink = 1,

0 commit comments

Comments
 (0)