Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/audio/mfcc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ if(CONFIG_COMP_MFCC STREQUAL "m" AND DEFINED CONFIG_LLEXT)
add_subdirectory(llext ${PROJECT_BINARY_DIR}/mfcc_llext)
add_dependencies(app mfcc)
else()
add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c mfcc_hifi4.c mfcc_hifi3.c)
add_local_sources(sof mfcc.c mfcc_setup.c mfcc_common.c mfcc_generic.c mfcc_hifi4.c mfcc_hifi3.c mfcc_vad.c)
endif()
91 changes: 64 additions & 27 deletions src/audio/mfcc/mfcc_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <stddef.h>
#include <stdint.h>

#include <sof/audio/mfcc/mfcc_vad.h>

LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL);

/*
Expand Down Expand Up @@ -169,6 +171,22 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *

cc_count += state->dct.num_out;
}

/* Use hop counter for frame numbering (independent of VAD enable) */
state->header.frame_number = state->hop_count;

/* Run VAD on the mel log spectrum (available in both modes) */
if (config->enable_vad) {
mfcc_vad_update(&cd->vad, state->mel_log_32);

/* Populate data header for this output frame */
state->header.energy = cd->vad.energy;
state->header.noise_energy = cd->vad.noise_energy;
state->header.vad_flag = cd->vad.is_speech ? 1 : 0;
}

/* Increment hop counter at end of hop processing */
state->hop_count++;
}

return cc_count;
Expand Down Expand Up @@ -267,9 +285,8 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
struct mfcc_comp_data *cd = module_get_private_data(mod);
struct mfcc_state *state = &cd->state;
struct mfcc_buffer *buf = &cd->state.buf;
uint32_t magic = MFCC_MAGIC;
int16_t *w_ptr = audio_stream_get_wptr(sink);
const int num_magic = 2;
const int num_header_s16 = sizeof(state->header) / sizeof(int16_t);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the division guaranteed to be exact or you do want rounding-down?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is guaranteed manually with design of the struct. This part of code changes in next PR to MFCC #10814 with (more) final version of output code, so better to put review effort there.

int num_ceps;
int sink_samples;
int to_copy;
Expand All @@ -280,25 +297,33 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer
/* Run STFT and processing after FFT: Mel auditory filter and DCT. */
num_ceps = mfcc_stft_process(mod->dev, cd);

/* If new output produced, set up pointer into scratch data and mark magic pending */
/* If new output produced, set up pointer into scratch data and mark header pending */
if (num_ceps > 0) {
if (state->mel_only)
if (state->mel_only) {
state->out_data_ptr = state->mel_spectra->data;
else
} else {
state->out_data_ptr = state->cepstral_coef->data;
}

state->out_remain = num_ceps;
state->magic_pending = true;
state->header_pending = true;
}

/* Write to sink, limited by period size */
sink_samples = frames * audio_stream_get_channels(sink);

/* Write magic word first if pending */
if (state->magic_pending && sink_samples >= num_magic) {
w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_magic, (int16_t *)&magic);
sink_samples -= num_magic;
state->magic_pending = false;
/* Write data header first if pending */
if (state->header_pending) {
if (sink_samples < num_header_s16) {
/* Not enough sink space for header, defer entire frame */
mfcc_sink_copy_zero_s16(sink, w_ptr, sink_samples);
return;
}

w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_header_s16,
(int16_t *)&state->header);
sink_samples -= num_header_s16;
state->header_pending = false;
Comment thread
singalsu marked this conversation as resolved.
Comment thread
singalsu marked this conversation as resolved.
}

/* Write cepstral/mel data from scratch buffer */
Expand Down Expand Up @@ -363,9 +388,8 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
struct mfcc_comp_data *cd = module_get_private_data(mod);
struct mfcc_state *state = &cd->state;
struct mfcc_buffer *buf = &cd->state.buf;
uint32_t magic = MFCC_MAGIC;
int32_t *w_ptr = audio_stream_get_wptr(sink);
const int num_magic = 1; /* one int32_t word for magic */
const int num_header_s32 = sizeof(state->header) / sizeof(int32_t);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same about rounding

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, also by design a multiple of int32_t size.

int num_ceps;
int sink_samples;
int remain_s32;
Expand All @@ -391,17 +415,24 @@ void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer
}

state->out_remain = num_ceps;
state->magic_pending = true;
state->header_pending = true;
}

/* Write to sink, limited by period size */
sink_samples = frames * audio_stream_get_channels(sink);

/* Write magic word first if pending */
if (state->magic_pending && sink_samples >= num_magic) {
w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic);
sink_samples -= num_magic;
state->magic_pending = false;
/* Write data header first if pending */
if (state->header_pending) {
if (sink_samples < num_header_s32) {
/* Not enough sink space for header, defer entire frame */
mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples);
return;
}

w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_header_s32,
(int32_t *)&state->header);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like we expect the header size to be a multiple of 4 bytes, so maybe we could add a build-assertion and comments here to make it clear. Can be a follow-up, just checking that this is indeed the case

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no assertions, return an error.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lgirdwood a build assertion, not a runtime one. These sizes are known at build time, so a build-time check can be added with no run-time cost

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, build is fine.

@singalsu singalsu May 28, 2026

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tips how to add build time check and error for struct size (multiple of something) is welcome. I will still after #10814 try to come up with more generic audio feature stream for bespoke encoder with more header layers, e.g. with use of https://github.com/thesofproject/sof/blob/main/src/include/user/audio_feature.h . Now I'm focusing to make the things work well and to show improvement in power measurement vs. SOF PCM stream and things done in host user space.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@singalsu something like BUILD_ASSERT(!(sizeof(state->header) % sizeof(int32_t)))

sink_samples -= num_header_s32;
state->header_pending = false;
}

if (state->mel_only) {
Expand Down Expand Up @@ -443,9 +474,8 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
struct mfcc_comp_data *cd = module_get_private_data(mod);
struct mfcc_state *state = &cd->state;
struct mfcc_buffer *buf = &cd->state.buf;
uint32_t magic = MFCC_MAGIC;
int32_t *w_ptr = audio_stream_get_wptr(sink);
const int num_magic = 1; /* one int32_t word for magic */
const int num_header_s32 = sizeof(state->header) / sizeof(int32_t);
int num_ceps;
int sink_samples;
int remain_s32;
Expand All @@ -466,17 +496,24 @@ void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer
}

state->out_remain = num_ceps;
state->magic_pending = true;
state->header_pending = true;
}

/* Write to sink, limited by period size */
sink_samples = frames * audio_stream_get_channels(sink);

/* Write magic word first if pending */
if (state->magic_pending && sink_samples >= num_magic) {
w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic);
sink_samples -= num_magic;
state->magic_pending = false;
/* Write data header first if pending */
if (state->header_pending) {
if (sink_samples < num_header_s32) {
/* Not enough sink space for header, defer entire frame */
mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples);
return;
}

w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_header_s32,
(int32_t *)&state->header);
sink_samples -= num_header_s32;
state->header_pending = false;
}

if (state->mel_only) {
Expand Down
28 changes: 24 additions & 4 deletions src/audio/mfcc/mfcc_setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <stddef.h>
#include <stdint.h>

#include <sof/audio/mfcc/mfcc_vad.h>

/* Definitions for cepstral lifter */
#define PI_Q23 Q_CONVERT_FLOAT(3.1415926536, 23)
#define TWO_PI_Q23 Q_CONVERT_FLOAT(6.2831853072, 23)
Expand Down Expand Up @@ -127,6 +129,11 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
return -EINVAL;
}

if (sample_rate > MFCC_MAX_SAMPLE_RATE) {
comp_err(dev, "Sample rate %d exceeds max %d Hz", sample_rate, MFCC_MAX_SAMPLE_RATE);
return -EINVAL;
}

if (config->sample_frequency != sample_rate) {
comp_err(dev, "Config sample_frequency does not match stream");
return -EINVAL;
Expand Down Expand Up @@ -328,11 +335,11 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i

/* Check that output data can be drained within the periods spanned by one
* FFT hop. Each hop consumes fft_hop_size input samples and produces
* max_out_per_hop + 2 (magic) int16_t output values. The sink provides at
* least fft_hop_size * channels int16_t samples per hop (worst case s16).
* max_out_per_hop + 12 (magic header) int16_t output values. The sink provides
* at least fft_hop_size * channels int16_t samples per hop (worst case s16).
* If output exceeds this, data accumulates and will eventually overflow.
*/
int out_per_hop = max_out_per_hop + 2;
int out_per_hop = max_out_per_hop + sizeof(state->header) / sizeof(int16_t);
Comment thread
singalsu marked this conversation as resolved.
int sink_per_hop = fft->fft_hop_size * channels;

if (out_per_hop > sink_per_hop) {
Expand All @@ -345,11 +352,22 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i
/* Set initial state for STFT */
state->waiting_fill = true;
state->prev_samples_valid = false;
state->magic_pending = false;
state->header_pending = false;
state->hop_count = 0;
memset(&state->header, 0, sizeof(state->header));
state->header.magic = MFCC_MAGIC;
Comment thread
singalsu marked this conversation as resolved.
state->out_data_ptr = NULL;
state->out_data_ptr_32 = NULL;
state->out_remain = 0;

if (config->enable_vad) {
ret = mfcc_vad_init(&cd->vad, config->num_mel_bins, sample_rate, mod);
if (ret < 0) {
comp_err(dev, "Failed VAD init");
goto free_lifter;
}
}

comp_dbg(dev, "done");
return 0;

Expand Down Expand Up @@ -389,4 +407,6 @@ void mfcc_free_buffers(struct processing_module *mod)
mod_free(mod, cd->state.melfb.data);
mod_free(mod, cd->state.dct.matrix);
mod_free(mod, cd->state.lifter.matrix);
mod_free(mod, cd->vad.noise_floor);
mod_free(mod, cd->vad.weights);
}
Loading