thesofproject · lgirdwood · May 28, 2026 · May 12, 2026 · May 19, 2026 · May 19, 2026
@@ -4,5 +4,5 @@ if(CONFIG_COMP_MFCC STREQUAL "m" AND DEFINED CONFIG_LLEXT)
   add_subdirectory(llext ${PROJECT_BINARY_DIR}/mfcc_llext)
   add_dependencies(app mfcc)
 else()
-  add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c mfcc_hifi4.c mfcc_hifi3.c)
+  add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c mfcc_hifi4.c mfcc_hifi3.c mfcc_vad.c)
 endif()
@@ -21,6 +21,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include <sof/audio/mfcc/mfcc_vad.h>
+
 LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL);
 
 /*
@@ -169,6 +171,22 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *
 
 			cc_count += state->dct.num_out;
 		}
+
+		/* Use hop counter for frame numbering (independent of VAD enable) */
+		state->header.frame_number = state->hop_count;
+
+		/* Run VAD on the mel log spectrum (available in both modes) */
+		if (config->enable_vad) {
+			mfcc_vad_update(&cd->vad, state->mel_log_32);
+
+			/* Populate data header for this output frame */
+			state->header.energy = cd->vad.energy;
+			state->header.noise_energy = cd->vad.noise_energy;
+			state->header.vad_flag = cd->vad.is_speech ? 1 : 0;
+		}
+
+		/* Increment hop counter at end of hop processing */
+		state->hop_count++;
 	}
 
 	return cc_count;
@@ -267,9 +285,8 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
 	struct mfcc_comp_data *cd = module_get_private_data(mod);
 	struct mfcc_state *state = &cd->state;
 	struct mfcc_buffer *buf = &cd->state.buf;
-	uint32_t magic = MFCC_MAGIC;
 	int16_t *w_ptr = audio_stream_get_wptr(sink);
-	const int num_magic = 2;
+	const int num_header_s16 = sizeof(state->header) / sizeof(int16_t);
 	int num_ceps;
 	int sink_samples;
 	int to_copy;
@@ -280,25 +297,33 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
 	/* Run STFT and processing after FFT: Mel auditory filter and DCT. */
 	num_ceps = mfcc_stft_process(mod->dev, cd);
 
-	/* If new output produced, set up pointer into scratch data and mark magic pending */
+	/* If new output produced, set up pointer into scratch data and mark header pending */
 	if (num_ceps > 0) {
-		if (state->mel_only)
+		if (state->mel_only) {
 			state->out_data_ptr = state->mel_spectra->data;
-		else
+		} else {
 			state->out_data_ptr = state->cepstral_coef->data;
+		}
 
 		state->out_remain = num_ceps;
-		state->magic_pending = true;
+		state->header_pending = true;
 	}
 
 	/* Write to sink, limited by period size */
 	sink_samples = frames * audio_stream_get_channels(sink);
 
-	/* Write magic word first if pending */
-	if (state->magic_pending && sink_samples >= num_magic) {
-		w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_magic, (int16_t *)&magic);
-		sink_samples -= num_magic;
-		state->magic_pending = false;
+	/* Write data header first if pending */
+	if (state->header_pending) {
+		if (sink_samples < num_header_s16) {
+			/* Not enough sink space for header, defer entire frame */
+			mfcc_sink_copy_zero_s16(sink, w_ptr, sink_samples);
+			return;
+		}
+
+		w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_header_s16,
+						(int16_t *)&state->header);
+		sink_samples -= num_header_s16;
+		state->header_pending = false;
 	}
 
 	/* Write cepstral/mel data from scratch buffer */
@@ -363,9 +388,8 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
 	struct mfcc_comp_data *cd = module_get_private_data(mod);
 	struct mfcc_state *state = &cd->state;
 	struct mfcc_buffer *buf = &cd->state.buf;
-	uint32_t magic = MFCC_MAGIC;
 	int32_t *w_ptr = audio_stream_get_wptr(sink);
-	const int num_magic = 1; /* one int32_t word for magic */
+	const int num_header_s32 = sizeof(state->header) / sizeof(int32_t);
 	int num_ceps;
 	int sink_samples;
 	int remain_s32;
@@ -391,17 +415,24 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
 		}
 
 		state->out_remain = num_ceps;
-		state->magic_pending = true;
+		state->header_pending = true;
 	}
 
 	/* Write to sink, limited by period size */
 	sink_samples = frames * audio_stream_get_channels(sink);
 
-	/* Write magic word first if pending */
-	if (state->magic_pending && sink_samples >= num_magic) {
-		w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic);
-		sink_samples -= num_magic;
-		state->magic_pending = false;
+	/* Write data header first if pending */
+	if (state->header_pending) {
+		if (sink_samples < num_header_s32) {
+			/* Not enough sink space for header, defer entire frame */
+			mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples);
+			return;
+		}
+
+		w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_header_s32,
+						(int32_t *)&state->header);
+		sink_samples -= num_header_s32;
+		state->header_pending = false;
 	}
 
 	if (state->mel_only) {
@@ -443,9 +474,8 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
 	struct mfcc_comp_data *cd = module_get_private_data(mod);
 	struct mfcc_state *state = &cd->state;
 	struct mfcc_buffer *buf = &cd->state.buf;
-	uint32_t magic = MFCC_MAGIC;
 	int32_t *w_ptr = audio_stream_get_wptr(sink);
-	const int num_magic = 1; /* one int32_t word for magic */
+	const int num_header_s32 = sizeof(state->header) / sizeof(int32_t);
 	int num_ceps;
 	int sink_samples;
 	int remain_s32;
@@ -466,17 +496,24 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
 		}
 
 		state->out_remain = num_ceps;
-		state->magic_pending = true;
+		state->header_pending = true;
 	}
 
 	/* Write to sink, limited by period size */
 	sink_samples = frames * audio_stream_get_channels(sink);
 
-	/* Write magic word first if pending */
-	if (state->magic_pending && sink_samples >= num_magic) {
-		w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic);
-		sink_samples -= num_magic;
-		state->magic_pending = false;
+	/* Write data header first if pending */
+	if (state->header_pending) {
+		if (sink_samples < num_header_s32) {
+			/* Not enough sink space for header, defer entire frame */
+			mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples);
+			return;
+		}
+
+		w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_header_s32,
+						(int32_t *)&state->header);
+		sink_samples -= num_header_s32;
+		state->header_pending = false;
 	}
 
 	if (state->mel_only) {

@@ -18,6 +18,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include <sof/audio/mfcc/mfcc_vad.h>
+
 /* Definitions for cepstral lifter */
 #define PI_Q23 Q_CONVERT_FLOAT(3.1415926536, 23)
 #define TWO_PI_Q23 Q_CONVERT_FLOAT(6.2831853072, 23)
@@ -127,6 +129,11 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 		return -EINVAL;
 	}
 
+	if (sample_rate > MFCC_MAX_SAMPLE_RATE) {
+		comp_err(dev, "Sample rate %d exceeds max %d Hz", sample_rate, MFCC_MAX_SAMPLE_RATE);
+		return -EINVAL;
+	}
+
 	if (config->sample_frequency != sample_rate) {
 		comp_err(dev, "Config sample_frequency does not match stream");
 		return -EINVAL;
@@ -328,11 +335,11 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 
 	/* Check that output data can be drained within the periods spanned by one
 	 * FFT hop. Each hop consumes fft_hop_size input samples and produces
-	 * max_out_per_hop + 2 (magic) int16_t output values. The sink provides at
-	 * least fft_hop_size * channels int16_t samples per hop (worst case s16).
+	 * max_out_per_hop + 12 (magic header) int16_t output values. The sink provides
+	 * at least fft_hop_size * channels int16_t samples per hop (worst case s16).
 	 * If output exceeds this, data accumulates and will eventually overflow.
 	 */
-	int out_per_hop = max_out_per_hop + 2;
+	int out_per_hop = max_out_per_hop + sizeof(state->header) / sizeof(int16_t);
 	int sink_per_hop = fft->fft_hop_size * channels;
 
 	if (out_per_hop > sink_per_hop) {
@@ -345,11 +352,22 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
 	/* Set initial state for STFT */
 	state->waiting_fill = true;
 	state->prev_samples_valid = false;
-	state->magic_pending = false;
+	state->header_pending = false;
+	state->hop_count = 0;
+	memset(&state->header, 0, sizeof(state->header));
+	state->header.magic = MFCC_MAGIC;
 	state->out_data_ptr = NULL;
 	state->out_data_ptr_32 = NULL;
 	state->out_remain = 0;
 
+	if (config->enable_vad) {
+		ret = mfcc_vad_init(&cd->vad, config->num_mel_bins, sample_rate, mod);
+		if (ret < 0) {
+			comp_err(dev, "Failed VAD init");
+			goto free_lifter;
+		}
+	}
+
 	comp_dbg(dev, "done");
 	return 0;
 
@@ -389,4 +407,6 @@ void mfcc_free_buffers(struct processing_module *mod)
 	mod_free(mod, cd->state.melfb.data);
 	mod_free(mod, cd->state.dct.matrix);
 	mod_free(mod, cd->state.lifter.matrix);
+	mod_free(mod, cd->vad.noise_floor);
+	mod_free(mod, cd->vad.weights);
 }