Skip to content

Commit e4d8190

Browse files
committed
Audio: MFCC: Add Voice Activity Detection based on Mel spectrum
This patch adds a new mfcc_vad module that implements VAD operating on the Mel log spectrum values produced by the MFCC component. The VAD is very simple and is not very selective for voice vs. other signals. But the continuously updated background noise estimate prevents stationary noises to trigger the VAD. The algorithm tracks a per-bin noise floor (instant-down, slow-rise) and computes a A-weighted energy delta. The used weight emphasizes speech frequencies. Speech is declared when the delta exceeds a threshold (0.30 in Q9.23) with a 20-frame hangover to prevent rapid toggling. The VAD flag is inserted into the output stream as the first value after the magic header word in all format paths (S16, S24, S32). A new Kconfig option CONFIG_COMP_MFCC_VAD (depends on COMP_MFCC, default n) gates compilation of the VAD code and the stream format change. Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
1 parent e35a7ef commit e4d8190

8 files changed

Lines changed: 426 additions & 0 deletions

File tree

src/arch/host/configs/library_defconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ CONFIG_COMP_IIR=y
1111
CONFIG_COMP_IGO_NR=y
1212
CONFIG_COMP_LEVEL_MULTIPLIER=y
1313
CONFIG_COMP_MFCC=y
14+
CONFIG_COMP_MFCC_VAD=y
1415
CONFIG_COMP_MODULE_ADAPTER=y
1516
CONFIG_COMP_MULTIBAND_DRC=y
1617
CONFIG_COMP_MUX=y

src/audio/mfcc/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,7 @@ if(CONFIG_COMP_MFCC STREQUAL "m" AND DEFINED CONFIG_LLEXT)
55
add_dependencies(app mfcc)
66
else()
77
add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c mfcc_hifi4.c mfcc_hifi3.c)
8+
if(CONFIG_COMP_MFCC_VAD)
9+
add_local_sources(sof mfcc_vad.c)
10+
endif()
811
endif()

src/audio/mfcc/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,14 @@ config COMP_MFCC
2424
The characteristic of the audio features are defined in the binary
2525
control blob. Directory tools/tune/mfcc contains a tool to create
2626
the configurations.
27+
28+
config COMP_MFCC_VAD
29+
bool "MFCC Voice Activity Detection"
30+
depends on COMP_MFCC
31+
default n
32+
help
33+
This option enables a Voice Activity Detector (VAD) that operates
34+
on the Mel spectrum values produced by the MFCC component. The VAD
35+
flag is inserted into the output stream as the first int32_t value
36+
after the magic header word. The VAD tracks a per-bin noise floor
37+
and detects speech using a weighted energy delta with hangover.

src/audio/mfcc/mfcc_common.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
#include <stddef.h>
2222
#include <stdint.h>
2323

24+
#ifdef CONFIG_COMP_MFCC_VAD
25+
#include <sof/audio/mfcc/mfcc_vad.h>
26+
#endif
27+
2428
LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL);
2529

2630
/*
@@ -144,6 +148,10 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
144148
sat_int32(Q_MULTSR_32X32(s, config->mel_scale, 23, 12, 23));
145149
}
146150

151+
#ifdef CONFIG_COMP_MFCC_VAD
152+
/* Run VAD on the mel log spectrum before further processing */
153+
state->vad_flag = mfcc_vad_update(&cd->vad, state->mel_log_32);
154+
#endif
147155
/* Store Q9.7 version in mel_spectra for s16 output mode */
148156
for (j = 0; j < state->dct.num_in; j++)
149157
state->mel_spectra->data[j] =
@@ -289,6 +297,9 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
289297

290298
state->out_remain = num_ceps;
291299
state->magic_pending = true;
300+
#ifdef CONFIG_COMP_MFCC_VAD
301+
state->vad_pending = true;
302+
#endif
292303
}
293304

294305
/* Write to sink, limited by period size */
@@ -301,6 +312,15 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
301312
state->magic_pending = false;
302313
}
303314

315+
#ifdef CONFIG_COMP_MFCC_VAD
316+
/* Write VAD flag as first value after magic (as two int16_t = one int32_t) */
317+
if (state->vad_pending && sink_samples >= 2) {
318+
w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, 2, (int16_t *)&state->vad_flag);
319+
sink_samples -= 2;
320+
state->vad_pending = false;
321+
}
322+
#endif
323+
304324
/* Write cepstral/mel data from scratch buffer */
305325
to_copy = MIN(state->out_remain, sink_samples);
306326
if (to_copy > 0) {
@@ -392,6 +412,9 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
392412

393413
state->out_remain = num_ceps;
394414
state->magic_pending = true;
415+
#ifdef CONFIG_COMP_MFCC_VAD
416+
state->vad_pending = true;
417+
#endif
395418
}
396419

397420
/* Write to sink, limited by period size */
@@ -404,6 +427,15 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
404427
state->magic_pending = false;
405428
}
406429

430+
#ifdef CONFIG_COMP_MFCC_VAD
431+
/* Write VAD flag as first value after magic */
432+
if (state->vad_pending && sink_samples >= 1) {
433+
w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, 1, &state->vad_flag);
434+
sink_samples -= 1;
435+
state->vad_pending = false;
436+
}
437+
#endif
438+
407439
if (state->mel_only) {
408440
/* Write 32-bit mel data Q9.15, one value per int32_t */
409441
to_copy = MIN(state->out_remain, sink_samples);
@@ -467,6 +499,9 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
467499

468500
state->out_remain = num_ceps;
469501
state->magic_pending = true;
502+
#ifdef CONFIG_COMP_MFCC_VAD
503+
state->vad_pending = true;
504+
#endif
470505
}
471506

472507
/* Write to sink, limited by period size */
@@ -479,6 +514,15 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
479514
state->magic_pending = false;
480515
}
481516

517+
#ifdef CONFIG_COMP_MFCC_VAD
518+
/* Write VAD flag as first value after magic */
519+
if (state->vad_pending && sink_samples >= 1) {
520+
w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, 1, &state->vad_flag);
521+
sink_samples -= 1;
522+
state->vad_pending = false;
523+
}
524+
#endif
525+
482526
if (state->mel_only) {
483527
/* Write 32-bit mel data Q9.23, one value per int32_t */
484528
to_copy = MIN(state->out_remain, sink_samples);

src/audio/mfcc/mfcc_setup.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
#include <stddef.h>
1919
#include <stdint.h>
2020

21+
#ifdef CONFIG_COMP_MFCC_VAD
22+
#include <sof/audio/mfcc/mfcc_vad.h>
23+
#endif
24+
2125
/* Definitions for cepstral lifter */
2226
#define PI_Q23 Q_CONVERT_FLOAT(3.1415926536, 23)
2327
#define TWO_PI_Q23 Q_CONVERT_FLOAT(6.2831853072, 23)
@@ -346,10 +350,24 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
346350
state->waiting_fill = true;
347351
state->prev_samples_valid = false;
348352
state->magic_pending = false;
353+
#ifdef CONFIG_COMP_MFCC_VAD
354+
state->vad_pending = false;
355+
state->vad_flag = 0;
356+
#endif
349357
state->out_data_ptr = NULL;
350358
state->out_data_ptr_32 = NULL;
351359
state->out_remain = 0;
352360

361+
#ifdef CONFIG_COMP_MFCC_VAD
362+
ret = mfcc_vad_init(&cd->vad, config->num_mel_bins, sample_rate);
363+
if (ret < 0) {
364+
comp_err(dev, "Failed VAD init");
365+
goto free_lifter;
366+
}
367+
368+
comp_info(dev, "VAD enabled, num_mel_bins = %d", config->num_mel_bins);
369+
#endif
370+
353371
comp_dbg(dev, "done");
354372
return 0;
355373

0 commit comments

Comments
 (0)