diff --git a/.github/workflows/build-distros.yml b/.github/workflows/build-distros.yml index 4bc89735b..f0c1c0a91 100644 --- a/.github/workflows/build-distros.yml +++ b/.github/workflows/build-distros.yml @@ -35,34 +35,65 @@ jobs: sed -i 's/^Types: deb/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources DEBIAN_FRONTEND=noninteractive apt-get -qq update DEBIAN_FRONTEND=noninteractive apt-get -yqq build-dep mlt - DEBIAN_FRONTEND=noninteractive apt-get -yqq install cmake qt6-base-dev libqt6svg6-dev + DEBIAN_FRONTEND=noninteractive apt-get -yqq install autoconf automake git libtool pkg-config wget cmake qt6-base-dev libqt6svg6-dev + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + cd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + make install - name: ubuntu-24.04 image: ubuntu:24.04 setup_script: | sed -i 's/^Types: deb/Types: deb deb-src/' /etc/apt/sources.list.d/ubuntu.sources DEBIAN_FRONTEND=noninteractive apt-get -qq update DEBIAN_FRONTEND=noninteractive apt-get -yqq build-dep mlt - DEBIAN_FRONTEND=noninteractive apt-get -yqq install cmake qt6-base-dev libqt6svg6-dev + DEBIAN_FRONTEND=noninteractive apt-get -yqq install autoconf automake git libtool pkg-config wget cmake qt6-base-dev libqt6svg6-dev + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + cd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + make install - name: ubuntu-22.04 image: ubuntu:22.04 setup_script: | sed -i '/^#\sdeb-src /s/^#//' "/etc/apt/sources.list" DEBIAN_FRONTEND=noninteractive apt-get -qq update DEBIAN_FRONTEND=noninteractive apt-get -yqq build-dep mlt - DEBIAN_FRONTEND=noninteractive apt-get -yqq install cmake qt6-base-dev libqt6svg6-dev libqt6core5compat6-dev + DEBIAN_FRONTEND=noninteractive apt-get -yqq install autoconf automake git libtool pkg-config wget cmake qt6-base-dev libqt6svg6-dev libqt6core5compat6-dev + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + cd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + make install - name: debian-unstable image: debian:unstable setup_script: | echo 'deb-src http://deb.debian.org/debian unstable main' >> /etc/apt/sources.list apt-get -qq update apt-get -yqq build-dep mlt + apt-get -yqq install autoconf automake git libtool pkg-config wget + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + cd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + make install - name: debian-testing image: debian:testing setup_script: | echo 'deb-src http://deb.debian.org/debian testing main' >> /etc/apt/sources.list apt-get -qq update apt-get -yqq build-dep mlt - apt-get -yqq install cmake qt6-base-dev libqt6svg6-dev + apt-get -yqq install autoconf automake git libtool pkg-config wget cmake qt6-base-dev libqt6svg6-dev + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + cd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + make install - name: debian-stable image: debian:stable setup_script: | @@ -70,7 +101,13 @@ jobs: echo 'deb http://deb.debian.org/debian bookworm-backports main' >> /etc/apt/sources.list apt-get -qq update apt-get -yqq build-dep mlt - apt-get -yqq install cmake qt6-base-dev libqt6svg6-dev + apt-get -yqq install autoconf automake git libtool pkg-config wget cmake qt6-base-dev libqt6svg6-dev + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + cd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + make install - name: fedora-44 image: fedora:44 setup_script: | @@ -83,7 +120,7 @@ jobs: libtheora-devel libvorbis-devel libvdpau-devel \ libsoup-devel liboil-devel python-devel alsa-lib \ pulseaudio-libs-devel gcc-c++ cmake ffmpeg-free-devel \ - movit-devel rubberband-devel vid.stab-devel + movit-devel rubberband-devel vid.stab-devel rnnoise-devel - name: fedora-38 image: fedora:38 setup_script: | @@ -96,7 +133,7 @@ jobs: libtheora-devel libvorbis-devel libvdpau-devel \ libsoup-devel liboil-devel python-devel alsa-lib \ pulseaudio-libs-devel gcc-c++ cmake ffmpeg-free-devel \ - movit-devel rubberband-devel vid.stab-devel + movit-devel rubberband-devel vid.stab-devel rnnoise-devel container: image: ${{ matrix.image }} diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml index 0e618634f..5484c6bed 100644 --- a/.github/workflows/build-linux.yml +++ b/.github/workflows/build-linux.yml @@ -38,7 +38,14 @@ jobs: sudo sed -i '/^#\sdeb-src /s/^#//' "/etc/apt/sources.list" sudo apt-get -qq update sudo apt-get -yqq build-dep mlt - sudo apt-get -yqq install qt6-base-dev libqt6svg6-dev libqt6core5compat6-dev + sudo apt-get -yqq install autoconf automake git libtool pkg-config wget qt6-base-dev libqt6svg6-dev libqt6core5compat6-dev + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + pushd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/usr/local --enable-shared --disable-static + make -j"$(nproc)" + sudo make install + popd sudo apt-get -yqq install cmake ninja-build kwalify cmake -D CMAKE_BUILD_TYPE=Debug -D BUILD_TESTING=ON -D SWIG_PYTHON=ON -S . -B build -G Ninja cmake --build build diff --git a/.github/workflows/build-msys2-mingw64.yml b/.github/workflows/build-msys2-mingw64.yml index 2417926cd..8b89e2e41 100644 --- a/.github/workflows/build-msys2-mingw64.yml +++ b/.github/workflows/build-msys2-mingw64.yml @@ -65,6 +65,13 @@ jobs: - name: Build C/C++ with CMake shell: msys2 {0} run: | + git clone --depth 1 --branch v0.2 https://github.com/xiph/rnnoise.git /tmp/rnnoise + pushd /tmp/rnnoise + ./autogen.sh + ./configure --prefix=/mingw64 --enable-shared --disable-static + make -j"$(nproc)" + make install + popd cmake -D CMAKE_BUILD_TYPE=Debug -D MOD_MOVIT=OFF -D SWIG_PYTHON=ON -S . -B build -G Ninja cmake --build build cmake --install build diff --git a/.github/workflows/static-code-analysis.yml b/.github/workflows/static-code-analysis.yml index 00a95de14..103950967 100644 --- a/.github/workflows/static-code-analysis.yml +++ b/.github/workflows/static-code-analysis.yml @@ -35,7 +35,7 @@ jobs: sudo apt-get -yqq install clang-format-14 - name: Run CMake run: | - cmake -S . -B build -D CLANG_FORMAT=ON -D MOD_SDL2=OFF -D MOD_QT6=OFF -D MOD_AVFORMAT=OFF -D MOD_DECKLINK=OFF -D MOD_FREI0R=OFF -D MOD_GDK=OFF -D MOD_JACKRACK=OFF -D USE_LV2=OFF -D USE_VST2=OFF -D MOD_KDENLIVE=OFF -D MOD_NORMALIZE=OFF -D MOD_OLDFILM=OFF -D MOD_MOVIT=OFF -D MOD_PLUS=OFF -D MOD_PLUSGPL=OFF -D MOD_RESAMPLE=OFF -D MOD_RTAUDIO=OFF -D MOD_RUBBERBAND=OFF -D MOD_SOX=OFF -D MOD_VIDSTAB=OFF -D MOD_VORBIS=OFF -D MOD_XINE=OFF -D MOD_XML=OFF + cmake -S . -B build -D CLANG_FORMAT=ON -D MOD_SDL2=OFF -D MOD_QT6=OFF -D MOD_AVFORMAT=OFF -D MOD_DECKLINK=OFF -D MOD_FREI0R=OFF -D MOD_GDK=OFF -D MOD_JACKRACK=OFF -D USE_LV2=OFF -D USE_VST2=OFF -D MOD_KDENLIVE=OFF -D MOD_NORMALIZE=OFF -D MOD_OLDFILM=OFF -D MOD_MOVIT=OFF -D MOD_PLUS=OFF -D MOD_PLUSGPL=OFF -D MOD_RESAMPLE=OFF -D MOD_RNNOISE=OFF -D MOD_RTAUDIO=OFF -D MOD_RUBBERBAND=OFF -D MOD_SOX=OFF -D MOD_VIDSTAB=OFF -D MOD_VORBIS=OFF -D MOD_XINE=OFF -D MOD_XML=OFF - name: Run clang-format run: | cd build diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b2c6a013..bb52cd417 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,10 @@ option(MOD_QT6 "Enable Qt6 module (GPL)" ON) option(MOD_RESAMPLE "Enable Resample module (GPL)" ON) option(MOD_RTAUDIO "Enable RtAudio module" ON) option(MOD_RUBBERBAND "Enable Rubberband module (GPL)" ON) +option(MOD_RNNOISE "Enable RNNoise noise-reduction module" ON) +if(WIN32 AND MSVC) + set(MOD_RNNOISE OFF CACHE BOOL "Enable RNNoise noise-reduction module" FORCE) +endif() option(MOD_SDL1 "Enable SDL1 module" OFF) option(MOD_SDL2 "Enable SDL2 module" ON) option(MOD_SOX "Enable SoX module" ON) @@ -368,6 +372,11 @@ if(MOD_RUBBERBAND) list(APPEND MLT_SUPPORTED_COMPONENTS rubberband) endif() +if(MOD_RNNOISE) + pkg_check_modules(rnnoise REQUIRED IMPORTED_TARGET rnnoise) + list(APPEND MLT_SUPPORTED_COMPONENTS rnnoise) +endif() + if(MOD_SDL1) pkg_check_modules(sdl REQUIRED IMPORTED_TARGET sdl) list(APPEND MLT_SUPPORTED_COMPONENTS sdl) @@ -589,6 +598,7 @@ add_feature_info("Module: Qt6" MOD_QT6 "") add_feature_info("Module: Resample" MOD_RESAMPLE "") add_feature_info("Module: RtAudio" MOD_RTAUDIO "") add_feature_info("Module: Rubberband" MOD_RUBBERBAND "") +add_feature_info("Module: RNNoise" MOD_RNNOISE "") add_feature_info("Module: SDL1" MOD_SDL1 "") add_feature_info("Module: SDL2" MOD_SDL2 "") add_feature_info("Module: SoX" MOD_SOX "") diff --git a/CMakePresets.json b/CMakePresets.json index 1becb6e4a..cb4168648 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -32,6 +32,7 @@ "MOD_RESAMPLE": "ON", "MOD_RTAUDIO": "ON", "MOD_RUBBERBAND": "ON", + "MOD_RNNOISE": "OFF", "MOD_SDL2": "ON", "MOD_SPATIALAUDIO": "OFF", "MOD_VIDSTAB": "OFF", diff --git a/src/modules/CMakeLists.txt b/src/modules/CMakeLists.txt index f079cb186..bbeb83426 100644 --- a/src/modules/CMakeLists.txt +++ b/src/modules/CMakeLists.txt @@ -72,6 +72,10 @@ if(MOD_RUBBERBAND) add_subdirectory(rubberband) endif() +if(MOD_RNNOISE) + add_subdirectory(rnnoise) +endif() + if(MOD_SDL1) add_subdirectory(sdl) endif() diff --git a/src/modules/rnnoise/CMakeLists.txt b/src/modules/rnnoise/CMakeLists.txt new file mode 100644 index 000000000..c20a89867 --- /dev/null +++ b/src/modules/rnnoise/CMakeLists.txt @@ -0,0 +1,16 @@ +add_library(mltrnnoise MODULE factory.c filter_rnnoise.c link_rnnoise.c) + +file(GLOB YML "*.yml") +add_custom_target(Other_rnnoise_Files SOURCES ${YML}) + +include(GenerateExportHeader) +generate_export_header(mltrnnoise) + +target_compile_options(mltrnnoise PRIVATE ${MLT_COMPILE_OPTIONS}) +target_include_directories(mltrnnoise PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries(mltrnnoise PRIVATE mlt PkgConfig::rnnoise) + +set_target_properties(mltrnnoise PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${MLT_MODULE_OUTPUT_DIRECTORY}") + +install(TARGETS mltrnnoise LIBRARY DESTINATION ${MLT_INSTALL_MODULE_DIR}) +install(FILES filter_rnnoise.yml link_rnnoise.yml DESTINATION ${MLT_INSTALL_DATA_DIR}/rnnoise) diff --git a/src/modules/rnnoise/factory.c b/src/modules/rnnoise/factory.c new file mode 100644 index 000000000..2f8b6a1bb --- /dev/null +++ b/src/modules/rnnoise/factory.c @@ -0,0 +1,48 @@ +/* + * factory.c -- the factory method interfaces for the rnnoise module + * Copyright (C) 2026 Meltytech, LLC + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include "mltrnnoise_export.h" +#include +#include +#include + +extern mlt_filter filter_rnnoise_init(mlt_profile profile, + mlt_service_type type, + const char *id, + char *arg); +extern mlt_link link_rnnoise_init(mlt_profile profile, + mlt_service_type type, + const char *id, + char *arg); + +static mlt_properties metadata(mlt_service_type type, const char *id, void *data) +{ + char file[PATH_MAX]; + snprintf(file, PATH_MAX, "%s/rnnoise/%s", mlt_environment("MLT_DATA"), (char *) data); + return mlt_properties_parse_yaml(file); +} + +MLTRNNOISE_EXPORT MLT_REPOSITORY +{ + MLT_REGISTER(mlt_service_filter_type, "rnnoise", filter_rnnoise_init); + MLT_REGISTER(mlt_service_link_type, "rnnoise", link_rnnoise_init); + MLT_REGISTER_METADATA(mlt_service_filter_type, "rnnoise", metadata, "filter_rnnoise.yml"); + MLT_REGISTER_METADATA(mlt_service_link_type, "rnnoise", metadata, "link_rnnoise.yml"); +} diff --git a/src/modules/rnnoise/filter_rnnoise.c b/src/modules/rnnoise/filter_rnnoise.c new file mode 100644 index 000000000..fde421d48 --- /dev/null +++ b/src/modules/rnnoise/filter_rnnoise.c @@ -0,0 +1,431 @@ +/* + * filter_rnnoise.c -- audio noise reduction using RNNoise + * Copyright (C) 2026 Meltytech, LLC + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include + +#include + +#include +#include + +#define MAX_CHANNELS 8 +#define RNNOISE_RATE 48000 + +// Input carry: at most rnn_frame-1 = 479 unprocessed samples per channel. +#define IN_CARRY_CAPACITY 480 +// Output carry: at 24fps/48kHz a frame is ~2000 samples. +// One extra RNNoise chunk (480 samples) above n_samples is the maximum excess. +// 1024 is ample. +#define OUT_CARRY_CAPACITY 1024 + +typedef struct +{ + DenoiseState *states[MAX_CHANNELS]; + int n_channels; + int frequency; + + // Input carry: raw (unprocessed) samples left over from the previous frame + // that didn't form a complete 480-sample RNNoise chunk. + // in_carry_count < rnn_frame always. + float in_carry[MAX_CHANNELS][IN_CARRY_CAPACITY]; + int in_carry_count; + + // Output carry: already-processed, mix-applied samples that belong to + // future MLT frames due to the output count exceeding n_samples. + // out_carry_count is typically small (< rnn_frame). + float *out_carry[MAX_CHANNELS]; + int out_carry_count; + + // Frame-position continuity tracking. + mlt_position expected_frame; + + // Dry-signal delay ring buffer for wet/dry mix alignment. + // RNNoise synthesizes from the previous call's FFT, which itself analyzed + // the frame before that ([analysis_mem, in]), so the output at call N + // reconstructs in_{N-2} — exactly 2 * rnn_frame = 960 samples of delay. + // We delay the dry signal by the same amount so both are aligned. + float dry_ring[MAX_CHANNELS][960]; + int dry_ring_pos; +} private_data; + +static void reset_all(mlt_filter filter, private_data *pdata, int channels) +{ + mlt_log_debug(MLT_FILTER_SERVICE(filter), + "reset (channels=%d in_carry=%d out_carry=%d)\n", + channels, + pdata->in_carry_count, + pdata->out_carry_count); + for (int i = 0; i < MAX_CHANNELS; i++) { + if (pdata->states[i]) { + rnnoise_destroy(pdata->states[i]); + pdata->states[i] = NULL; + } + } + for (int i = 0; i < channels && i < MAX_CHANNELS; i++) { + pdata->states[i] = rnnoise_create(NULL); + if (!pdata->states[i]) { + mlt_log_error(MLT_FILTER_SERVICE(filter), + "Failed to create RNNoise state for channel %d\n", + i); + } + } + pdata->n_channels = channels; + pdata->in_carry_count = 0; + pdata->out_carry_count = 0; + memset(pdata->dry_ring, 0, sizeof(pdata->dry_ring)); + pdata->dry_ring_pos = 0; +} + +static int rnnoise_get_audio(mlt_frame frame, + void **buffer, + mlt_audio_format *format, + int *frequency, + int *channels, + int *samples) +{ + mlt_filter filter = mlt_frame_pop_audio(frame); + mlt_properties filter_props = MLT_FILTER_PROPERTIES(filter); + private_data *pdata = (private_data *) filter->child; + + // RNNoise requires 48 kHz float input + *frequency = RNNOISE_RATE; + *format = mlt_audio_float; + + int error = mlt_frame_get_audio(frame, buffer, format, frequency, channels, samples); + if (error || *samples == 0) + return error; + + if (*format != mlt_audio_float && frame->convert_audio != NULL) + frame->convert_audio(frame, buffer, format, mlt_audio_float); + + mlt_service_lock(MLT_FILTER_SERVICE(filter)); + + const int n_samples = *samples; + const int ch = *channels; + const int rnn_frame = rnnoise_get_frame_size(); // always 480 + mlt_position frame_pos = mlt_frame_get_position(frame); + + if (ch > MAX_CHANNELS) { + mlt_log_warning(MLT_FILTER_SERVICE(filter), + "RNNoise filter supports up to %d channels, got %d; bypassing\n", + MAX_CHANNELS, + ch); + pdata->expected_frame = frame_pos + 1; + pdata->n_channels = ch; + pdata->frequency = *frequency; + mlt_service_unlock(MLT_FILTER_SERVICE(filter)); + return 0; + } + + // Detect format change or seek discontinuity -> full reset + if (pdata->n_channels != ch || pdata->frequency != *frequency + || pdata->expected_frame != frame_pos) { + reset_all(filter, pdata, ch); + pdata->frequency = *frequency; + } + + // Ensure output carry buffers are allocated + for (int c = 0; c < ch && c < MAX_CHANNELS; c++) { + if (!pdata->out_carry[c]) { + pdata->out_carry[c] = (float *) calloc(OUT_CARRY_CAPACITY, sizeof(float)); + if (!pdata->out_carry[c]) { + mlt_log_error(MLT_FILTER_SERVICE(filter), + "Failed to allocate output carry buffer for channel %d\n", + c); + mlt_service_unlock(MLT_FILTER_SERVICE(filter)); + return 1; + } + } + } + + double mix = mlt_properties_get_double(filter_props, "mix"); + mix = CLAMP(mix, 0.0, 1.0); + + // ------------------------------------------------------------------ + // Algorithm: input-carry + output-carry + // + // Input carry (raw, unprocessed, < rnn_frame samples): + // Leftover samples from the previous frame that did not form a + // complete 480-sample RNNoise chunk. Prepended to this frame's input + // before processing. RNNoise is NEVER fed silence after the first frame. + // + // On the very first call (in_carry_count == 0, out_carry_count == 0): + // Prepend `pad` silence samples so that total input is a multiple of + // rnn_frame. This introduces a one-time delay of `pad` samples and + // ensures in_carry_count remains 0 afterwards (pad is chosen to align). + // Because all chunks on the first frame use real audio (after the + // initial silence prefix), no silence is ever fed again. + // + // Output carry (processed, mix-applied): + // When the number of processed samples exceeds n_samples, the excess + // is stored here and prepended to the next frame's output. + // + // The cycle for n_samples=2000, rnn_frame=480: + // Frame 1: in_carry=0, pad=400, total=2400, chunks=5, out=2400, + // out_carry=400, in_carry=0 + // Frame 2: in_carry=0, pad=0, total=2000, chunks=4, out=1920, + // avail=400+1920=2320, deliver 2000, out_carry=320 + // ... + // Frame 6: avail=80+1920=2000, deliver 2000, out_carry=0 + // Frame 7: same as frame 2 + // (RNNoise is never fed silence after frame 1's initial pad) + // ------------------------------------------------------------------ + + // On first call, compute silence prefix so (pad + n_samples) % rnn_frame == 0 + int pad = 0; + if (pdata->in_carry_count == 0 && pdata->out_carry_count == 0) { + int remainder = n_samples % rnn_frame; + if (remainder != 0) + pad = rnn_frame - remainder; + } + + // Total input fed to RNNoise this call: + // [silence(pad)] ++ [in_carry] ++ [in_ch] + // But after the first frame, pad==0 always (see below). + int total_in = pad + pdata->in_carry_count + n_samples; + int n_chunks = total_in / rnn_frame; // floor: only complete chunks, no silence padding + int new_in_carry = total_in - n_chunks * rnn_frame; // leftover raw input (< rnn_frame) + + mlt_log_debug(MLT_FILTER_SERVICE(filter), + "frame=%d n_samples=%d pad=%d in_carry=%d out_carry=%d " + "n_chunks=%d new_in_carry=%d\n", + (int) frame_pos, + n_samples, + pad, + pdata->in_carry_count, + pdata->out_carry_count, + n_chunks, + new_in_carry); + + float *src = (float *) *buffer; + + // Allocate output buffer + struct mlt_audio_s out; + mlt_audio_set_values(&out, NULL, *frequency, mlt_audio_float, n_samples, ch); + mlt_audio_alloc_data(&out); + if (!out.data) { + mlt_log_error(MLT_FILTER_SERVICE(filter), "Failed to allocate output audio buffer\n"); + mlt_service_unlock(MLT_FILTER_SERVICE(filter)); + return 1; + } + float *dst = (float *) out.data; + + // Scratch buffers for one RNNoise chunk (stack) + float frame_in[480]; + float frame_out[480]; + + // We build a new input carry as we go (same across all channels, + // but stored per-channel for the raw values). + // in_carry_new_count is computed on channel 0 and reused. + int new_in_carry_count = 0; + int new_out_carry_count = 0; + + // Snapshot ring position so each channel processes the same virtual positions. + const int ring_size = 2 * rnn_frame; + int ring_start = pdata->dry_ring_pos; + + for (int c = 0; c < ch && c < MAX_CHANNELS; c++) { + pdata->dry_ring_pos = ring_start; // reset for each channel + float *in_ch = src + (size_t) c * n_samples; + float *out_ch = dst + (size_t) c * n_samples; + float *out_carry_ch = pdata->out_carry[c]; + + // 1. Drain output carry into out_ch + int from_out_carry = pdata->out_carry_count < n_samples ? pdata->out_carry_count + : n_samples; + if (c == 0 && pdata->out_carry_count > n_samples) + mlt_log_warning(MLT_FILTER_SERVICE(filter), + "frame=%d ch=%d out_carry_count=%d > n_samples=%d\n", + (int) frame_pos, + c, + pdata->out_carry_count, + n_samples); + memcpy(out_ch, out_carry_ch, from_out_carry * sizeof(float)); + int out_carry_left = pdata->out_carry_count - from_out_carry; + if (out_carry_left > 0) + memmove(out_carry_ch, out_carry_ch + from_out_carry, out_carry_left * sizeof(float)); + + int out_pos = from_out_carry; // next write index in out_ch + int carry_pos = out_carry_left; // next write index in out_carry_ch + + // 2. Process n_chunks complete RNNoise frames. + // Virtual read position into the combined input stream: + // negative indices → silence (pad prefix, first frame only) + // [0 .. in_carry_count) → pdata->in_carry[c] + // [in_carry_count .. total) → in_ch + int in_virtual = -pad; // starts in the silence region on first frame + int missing_state_logged = 0; + + for (int k = 0; k < n_chunks; k++) { + // Build one rnn_frame-sample input + for (int s = 0; s < rnn_frame; s++) { + int p = in_virtual + s; + float val; + if (p < 0) { + val = 0.0f; // silence prefix (first frame only) + } else if (p < pdata->in_carry_count) { + val = pdata->in_carry[c][p] * 32768.0f; + } else { + int q = p - pdata->in_carry_count; + val = (q < n_samples) ? in_ch[q] * 32768.0f : 0.0f; + } + frame_in[s] = val; + } + if (pdata->states[c]) { + rnnoise_process_frame(pdata->states[c], frame_out, frame_in); + } else { + if (!missing_state_logged) { + mlt_log_error(MLT_FILTER_SERVICE(filter), + "Missing RNNoise state for channel %d; bypassing denoise\n", + c); + missing_state_logged = 1; + } + memcpy(frame_out, frame_in, sizeof(frame_out)); + } + + // Distribute output to out_ch or out_carry + for (int s = 0; s < rnn_frame; s++) { + int p = in_virtual + s; + float raw; + if (p < 0) { + raw = 0.0f; + } else if (p < pdata->in_carry_count) { + raw = pdata->in_carry[c][p]; + } else { + int q = p - pdata->in_carry_count; + raw = (q < n_samples) ? in_ch[q] : 0.0f; + } + float denoised = frame_out[s] / 32768.0f; + // Delay dry by 2*rnn_frame to match RNNoise's two-frame internal delay. + int ring_idx = pdata->dry_ring_pos % ring_size; + float delayed_raw = pdata->dry_ring[c][ring_idx]; // read (2*rnn_frame ago) + pdata->dry_ring[c][ring_idx] = raw; // write current + pdata->dry_ring_pos++; + float mixed = delayed_raw * (1.0f - (float) mix) + denoised * (float) mix; + + if (out_pos < n_samples) { + out_ch[out_pos++] = mixed; + } else { + if (carry_pos < OUT_CARRY_CAPACITY) { + out_carry_ch[carry_pos++] = mixed; + } else { + mlt_log_warning(MLT_FILTER_SERVICE(filter), + "frame=%d ch=%d out_carry overflow at pos=%d\n", + (int) frame_pos, + c, + carry_pos); + } + } + } + in_virtual += rnn_frame; + } + + // 3. Save leftover raw input into in_carry (the samples not yet fed to RNNoise) + // These are the last new_in_carry samples of the combined input stream, + // which all come from in_ch (pad is only on first frame and aligns perfectly). + int in_carry_src = n_samples - new_in_carry; // start of leftover in in_ch + for (int s = 0; s < new_in_carry; s++) + pdata->in_carry[c][s] = in_ch[in_carry_src + s]; + + if (out_pos < n_samples) { + memset(out_ch + out_pos, 0, (size_t) (n_samples - out_pos) * sizeof(float)); + } + + if (c == 0) { + if (out_pos != n_samples) + mlt_log_warning(MLT_FILTER_SERVICE(filter), + "frame=%d ch=%d output not fully filled: " + "out_pos=%d n_samples=%d (gap=%d), zero-filled\n", + (int) frame_pos, + c, + out_pos, + n_samples, + n_samples - out_pos); + new_in_carry_count = new_in_carry; + new_out_carry_count = carry_pos; + } + + // Keep ring position bounded to prevent long-run integer overflow. + pdata->dry_ring_pos %= ring_size; + } + + pdata->in_carry_count = new_in_carry_count; + pdata->out_carry_count = new_out_carry_count; + pdata->expected_frame = frame_pos + 1; + + mlt_log_debug(MLT_FILTER_SERVICE(filter), + "frame=%d new_in_carry=%d new_out_carry=%d\n", + (int) frame_pos, + new_in_carry_count, + new_out_carry_count); + + mlt_frame_set_audio(frame, out.data, mlt_audio_float, 0, out.release_data); + *buffer = out.data; + *format = mlt_audio_float; + + mlt_service_unlock(MLT_FILTER_SERVICE(filter)); + return 0; +} + +static mlt_frame filter_process(mlt_filter filter, mlt_frame frame) +{ + mlt_frame_push_audio(frame, filter); + mlt_frame_push_audio(frame, (void *) rnnoise_get_audio); + return frame; +} + +static void close_filter(mlt_filter filter) +{ + private_data *pdata = (private_data *) filter->child; + if (pdata) { + for (int i = 0; i < MAX_CHANNELS; i++) { + if (pdata->states[i]) { + rnnoise_destroy(pdata->states[i]); + pdata->states[i] = NULL; + } + free(pdata->out_carry[i]); + } + free(pdata); + filter->child = NULL; + } +} + +mlt_filter filter_rnnoise_init(mlt_profile profile, mlt_service_type type, const char *id, char *arg) +{ + mlt_filter filter = mlt_filter_new(); + private_data *pdata = (private_data *) calloc(1, sizeof(private_data)); + + if (filter && pdata) { + pdata->expected_frame = -1; + filter->process = filter_process; + filter->close = close_filter; + filter->child = pdata; + mlt_properties_set_double(MLT_FILTER_PROPERTIES(filter), "mix", 1.0); + } else { + mlt_log_error(NULL, "RNNoise filter: failed to allocate resources\n"); + if (filter) + mlt_filter_close(filter); + free(pdata); + filter = NULL; + } + return filter; +} diff --git a/src/modules/rnnoise/filter_rnnoise.yml b/src/modules/rnnoise/filter_rnnoise.yml new file mode 100644 index 000000000..1bc466d5a --- /dev/null +++ b/src/modules/rnnoise/filter_rnnoise.yml @@ -0,0 +1,40 @@ +schema_version: 7.2 +type: filter +identifier: rnnoise +title: Noise Reduction (RNNoise) +version: 1 +copyright: Meltytech, LLC +license: LGPLv2.1 +language: en +tags: + - Audio +description: > + Reduce background noise in speech tracks using the RNNoise recurrent neural + network library. +notes: > + Operates at 48 kHz; audio is resampled automatically if needed. + + RNNoise itself introduces two 480-sample frames of latency, or about 20 ms + at 48 kHz, and both the filter and the link are subject to that delay. At + the start of playback, and again after a seek or other discontinuity, the + filter inserts up to one additional 480-sample frame of silence, or about 10 + ms, so it can stay aligned across MLT frame boundaries without lookahead. +audio_formats: + - float +parameters: + - identifier: mix + title: Suppression + type: float + description: > + Wet/dry mix. 1.0 (100%) outputs the fully denoised signal; 0.0 (0%) + passes the original signal through unchanged. Values in between blend + the two, which can help preserve naturalness when aggressive suppression + causes artefacts. + readonly: no + mutable: yes + animation: no + default: 1.0 + minimum: 0.0 + maximum: 1.0 + unit: '%' + scale: 100 diff --git a/src/modules/rnnoise/link_rnnoise.c b/src/modules/rnnoise/link_rnnoise.c new file mode 100644 index 000000000..cc0f7f9ce --- /dev/null +++ b/src/modules/rnnoise/link_rnnoise.c @@ -0,0 +1,534 @@ +/* + * link_rnnoise.c -- noise reduction link using RNNoise + * Copyright (C) 2026 Meltytech, LLC + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include + +#define RNNOISE_RATE 48000 +#define MAX_CHANNELS 8 +#define MIN_RNNOISE_FRAMES 3 +#define RNNOISE_STARTUP_DROP_FRAMES 2 +// Buffer sizes: at 24fps/48kHz a frame has ~2002 samples. +// With 1 future frame we can have up to ~4004 input samples at once. +// RNNoise frame = 480. Max chunks = ceil(4004/480) = 9 → max out = 9*480 = 4320. +// Use 8192 per channel for headroom. +#define BUF_CAPACITY 8192 + +typedef struct +{ + // RNNoise per-channel state + DenoiseState *states[MAX_CHANNELS]; + int n_channels; + int frequency; + + // Input carry buffer: samples waiting to form the next 480-sample RNNoise chunk. + // in_carry_count < 480 (a full chunk is processed immediately). + float in_carry[MAX_CHANNELS][480]; + int in_carry_count; + + // Output carry buffer: processed samples waiting to be delivered. + // out_carry_count holds how many valid samples are stored from index 0. + float *out_carry[MAX_CHANNELS]; + int out_carry_count; + + // Continuity tracking + mlt_position expected_frame; + mlt_position continuity_frame; + int continuity_sample; // sample offset within continuity_frame's audio + + // After reset, RNNoise emits two startup frames that are effectively silence. + // Consume those frames internally so this link adds no output delay. + int startup_drop_frames; +} private_data; + +static void reset_state(mlt_link self) +{ + private_data *pdata = (private_data *) self->child; + + // Destroy existing DenoiseState objects + for (int c = 0; c < MAX_CHANNELS; c++) { + if (pdata->states[c]) { + rnnoise_destroy(pdata->states[c]); + pdata->states[c] = NULL; + } + } + + pdata->n_channels = 0; + pdata->frequency = 0; + pdata->in_carry_count = 0; + pdata->out_carry_count = 0; + pdata->startup_drop_frames = RNNOISE_STARTUP_DROP_FRAMES; + pdata->continuity_frame = -1; + pdata->continuity_sample = 0; + pdata->expected_frame = -1; +} + +static void ensure_states(mlt_link self, int n_channels) +{ + private_data *pdata = (private_data *) self->child; + + if (pdata->n_channels == n_channels) + return; + + // Destroy old states + for (int c = 0; c < MAX_CHANNELS; c++) { + if (pdata->states[c]) { + rnnoise_destroy(pdata->states[c]); + pdata->states[c] = NULL; + } + } + + // Create new states + for (int c = 0; c < n_channels && c < MAX_CHANNELS; c++) { + pdata->states[c] = rnnoise_create(NULL); + } + + pdata->n_channels = n_channels; + pdata->in_carry_count = 0; + pdata->out_carry_count = 0; + pdata->startup_drop_frames = RNNOISE_STARTUP_DROP_FRAMES; +} + +// Copy samples from src (planar float) channel c, starting at sample_offset, +// up to n_src_samples total in src, into dst starting at dst_offset. +// Returns number of samples copied. +static int copy_samples(float *dst, + int dst_offset, + const float *src_plane, // src for channel c: src + c*src_total_samples + int src_offset, + int src_total_samples, + int count) +{ + int available = src_total_samples - src_offset; + int n = count < available ? count : available; + if (n <= 0) + return 0; + memcpy(dst + dst_offset, src_plane + src_offset, n * sizeof(float)); + return n; +} + +static int link_get_audio(mlt_frame frame, + void **buffer, + mlt_audio_format *format, + int *frequency, + int *channels, + int *samples) +{ + mlt_link self = (mlt_link) mlt_frame_pop_audio(frame); + private_data *pdata = (private_data *) self->child; + int error = 0; + double link_fps = mlt_producer_get_fps(MLT_LINK_PRODUCER(self)); + if (link_fps <= 0.0) + link_fps = 25.0; + mlt_position frame_pos = mlt_frame_get_position(frame); + + // Force 48kHz float for RNNoise + *frequency = RNNOISE_RATE; + *format = mlt_audio_float; + *channels = *channels <= 0 ? 2 : *channels; + if (*samples <= 0) + *samples = mlt_audio_calculate_frame_samples(link_fps, RNNOISE_RATE, frame_pos); + + mlt_service_lock(MLT_LINK_SERVICE(self)); + + // Detect seek: if not the expected frame, reset everything + if (pdata->expected_frame != frame_pos) { + reset_state(self); + pdata->continuity_frame = frame_pos; + pdata->continuity_sample = 0; + pdata->expected_frame = frame_pos; + } + + // Get current frame's audio (cached after first call) + struct mlt_audio_s cur_audio; + mlt_audio_set_values(&cur_audio, NULL, *frequency, *format, *samples, *channels); + error = mlt_frame_get_audio(frame, + &cur_audio.data, + &cur_audio.format, + &cur_audio.frequency, + &cur_audio.channels, + &cur_audio.samples); + if (error || !cur_audio.data || cur_audio.samples <= 0) { + mlt_service_unlock(MLT_LINK_SERVICE(self)); + return error; + } + + if (cur_audio.channels > MAX_CHANNELS) { + mlt_log_warning(MLT_LINK_SERVICE(self), + "RNNoise link supports up to %d channels, got %d; bypassing\n", + MAX_CHANNELS, + cur_audio.channels); + *buffer = cur_audio.data; + *frequency = cur_audio.frequency; + *format = cur_audio.format; + *channels = cur_audio.channels; + *samples = cur_audio.samples; + pdata->expected_frame = frame_pos + 1; + mlt_service_unlock(MLT_LINK_SERVICE(self)); + return 0; + } + + // Ensure correct number of RNNoise states + ensure_states(self, cur_audio.channels); + *channels = cur_audio.channels; + + // Get unique_properties for future frame lookup + mlt_properties unique_properties = mlt_frame_get_unique_properties(frame, + MLT_LINK_SERVICE(self)); + + // The RNNoise frame size is always 480 + const int rnn_frame = rnnoise_get_frame_size(); + + // Allocate output buffer + struct mlt_audio_s out; + mlt_audio_set_values(&out, + NULL, + RNNOISE_RATE, + cur_audio.format, + cur_audio.samples, + cur_audio.channels); + mlt_audio_alloc_data(&out); + if (!out.data) { + mlt_service_unlock(MLT_LINK_SERVICE(self)); + return 1; + } + + float mix = mlt_properties_get_double(MLT_LINK_PROPERTIES(self), "mix"); + mix = CLAMP(mix, 0.0f, 1.0f); + + // We fill out from out_carry, then generate more by feeding RNNoise chunks. + int out_delivered = 0; + + while (out_delivered < out.samples) { + // Drain the output carry buffer first + if (pdata->out_carry_count > 0) { + int n_take = out.samples - out_delivered; + if (n_take > pdata->out_carry_count) + n_take = pdata->out_carry_count; + for (int c = 0; c < *channels && c < MAX_CHANNELS; c++) { + if (pdata->out_carry[c]) { + memcpy((float *) out.data + c * out.samples + out_delivered, + pdata->out_carry[c], + n_take * sizeof(float)); + // Shift remaining + int remaining = pdata->out_carry_count - n_take; + if (remaining > 0) + memmove(pdata->out_carry[c], + pdata->out_carry[c] + n_take, + remaining * sizeof(float)); + } + } + pdata->out_carry_count -= n_take; + out_delivered += n_take; + continue; + } + + // Need to process more RNNoise frames to fill out_carry. + // First fill in_carry to 480 samples, crossing as many future MLT + // frames as needed to satisfy this RNNoise chunk. + while (pdata->in_carry_count < rnn_frame) { + // Determine source frame and audio + float *src_data = NULL; + int src_total_samples = 0; + int src_channels = *channels; + + if (pdata->continuity_frame == frame_pos) { + // Use current frame's audio + src_data = (float *) cur_audio.data; + src_total_samples = cur_audio.samples; + } else { + // Look up future frame from unique_properties + if (!unique_properties) { + break; + } + char key[19]; + int frame_delta = frame_pos - mlt_frame_original_position(frame); + sprintf(key, "%d", (int) (pdata->continuity_frame - frame_delta)); + mlt_frame src_frame = (mlt_frame) mlt_properties_get_data(unique_properties, + key, + NULL); + if (!src_frame) { + break; + } + + // Get audio from the future frame (may be cached) + struct mlt_audio_s future_audio; + int future_samples = mlt_audio_calculate_frame_samples(link_fps, + RNNOISE_RATE, + pdata->continuity_frame); + mlt_audio_set_values(&future_audio, + NULL, + RNNOISE_RATE, + mlt_audio_float, + future_samples, + *channels); + int ferr = mlt_frame_get_audio(src_frame, + &future_audio.data, + &future_audio.format, + &future_audio.frequency, + &future_audio.channels, + &future_audio.samples); + if (ferr || !future_audio.data || future_audio.samples <= 0) { + break; + } + src_data = (float *) future_audio.data; + src_total_samples = future_audio.samples; + src_channels = future_audio.channels; + } + + // Copy as many samples as possible into in_carry (up to rnn_frame) + int needed = rnn_frame - pdata->in_carry_count; + int copied_any = 0; + for (int c = 0; c < *channels && c < MAX_CHANNELS; c++) { + int n = 0; + if (c < src_channels) { + float *src_plane = src_data + c * src_total_samples; + n = copy_samples(pdata->in_carry[c], + pdata->in_carry_count, + src_plane, + pdata->continuity_sample, + src_total_samples, + needed); + } else { + // Missing source channels are treated as silence. + memset(pdata->in_carry[c] + pdata->in_carry_count, 0, needed * sizeof(float)); + n = needed; + } + if (c == 0) + copied_any = n; // use channel 0 to track + } + + if (copied_any <= 0) { + // Skip empty source frame and continue to the next one. + pdata->continuity_frame++; + pdata->continuity_sample = 0; + continue; + } + + pdata->in_carry_count += copied_any; + pdata->continuity_sample += copied_any; + + // Check if we've exhausted this frame's audio + if (pdata->continuity_sample >= src_total_samples) { + pdata->continuity_frame++; + pdata->continuity_sample = 0; + } + } + + if (pdata->in_carry_count < rnn_frame) { + // Not enough input to complete a chunk — zero-pad the remainder + for (int c = 0; c < *channels && c < MAX_CHANNELS; c++) { + memset(pdata->in_carry[c] + pdata->in_carry_count, + 0, + (rnn_frame - pdata->in_carry_count) * sizeof(float)); + } + pdata->in_carry_count = rnn_frame; + } + + // Process one 480-sample RNNoise chunk per channel + float rnn_in[480]; + float rnn_out[480]; + + // Ensure out_carry buffers are allocated + for (int c = 0; c < *channels && c < MAX_CHANNELS; c++) { + if (!pdata->out_carry[c]) { + pdata->out_carry[c] = (float *) calloc(BUF_CAPACITY, sizeof(float)); + if (!pdata->out_carry[c]) { + error = 1; + goto done; + } + } + } + + int drop_chunk = pdata->startup_drop_frames > 0; + int out_base = pdata->out_carry_count; + if (!drop_chunk && out_base + rnn_frame > BUF_CAPACITY) { + // Buffer overflow safeguard — should not happen with BUF_CAPACITY=8192 + error = 1; + goto done; + } + + for (int c = 0; c < *channels && c < MAX_CHANNELS; c++) { + // Scale up for RNNoise (expects ±32768) + for (int s = 0; s < rnn_frame; s++) + rnn_in[s] = pdata->in_carry[c][s] * 32768.0f; + + if (pdata->states[c]) { + rnnoise_process_frame(pdata->states[c], rnn_out, rnn_in); + } else { + mlt_log_error(MLT_LINK_SERVICE(self), + "Missing RNNoise state for channel %d; bypassing denoise\n", + c); + memcpy(rnn_out, rnn_in, sizeof(rnn_out)); + } + + // Scale back and apply wet/dry mix with aligned dry signal. + // Startup RNNoise delay is compensated by dropping first two output chunks. + for (int s = 0; s < rnn_frame; s++) { + float wet = rnn_out[s] / 32768.0f; + float dry = pdata->in_carry[c][s]; + if (!drop_chunk) + pdata->out_carry[c][out_base + s] = dry + mix * (wet - dry); + } + } + + if (drop_chunk) + pdata->startup_drop_frames--; + else + pdata->out_carry_count += rnn_frame; + pdata->in_carry_count = 0; + } + +done: + if (error) { + // Return silence on error + mlt_audio_silence(&out, out.samples, 0); + } + + int out_size = mlt_audio_format_size(out.format, out.samples, out.channels); + mlt_frame_set_audio(frame, out.data, out.format, out_size, out.release_data); + mlt_audio_get_values(&out, buffer, frequency, format, samples, channels); + + pdata->expected_frame = frame_pos + 1; + + mlt_service_unlock(MLT_LINK_SERVICE(self)); + return error; +} + +static int link_get_frame(mlt_link self, mlt_frame_ptr frame, int index) +{ + int error = 0; + private_data *pdata = (private_data *) self->child; + mlt_position frame_pos = mlt_producer_position(MLT_LINK_PRODUCER(self)); + double fps = mlt_producer_get_fps(MLT_LINK_PRODUCER(self)); + if (fps <= 0.0) + fps = 25.0; + int rnn_frame = rnnoise_get_frame_size(); + int needed_samples = MIN_RNNOISE_FRAMES * rnn_frame; + int frame_samples = mlt_audio_calculate_frame_samples(fps, RNNOISE_RATE, frame_pos); + int startup_drop = pdata ? pdata->startup_drop_frames : RNNOISE_STARTUP_DROP_FRAMES; + int output_coverage_samples = frame_samples + (startup_drop * rnn_frame); + if (output_coverage_samples > needed_samples) + needed_samples = output_coverage_samples; + int available_samples = mlt_audio_calculate_frame_samples(fps, RNNOISE_RATE, frame_pos); + int future_frames_needed = 0; + + while (available_samples < needed_samples) { + mlt_position future_pos = frame_pos + future_frames_needed + 1; + available_samples += mlt_audio_calculate_frame_samples(fps, RNNOISE_RATE, future_pos); + future_frames_needed++; + } + + mlt_producer_seek(self->next, frame_pos); + error = mlt_service_get_frame(MLT_PRODUCER_SERVICE(self->next), frame, index); + if (error) { + return error; + } + + mlt_properties unique_properties = mlt_frame_unique_properties(*frame, MLT_LINK_SERVICE(self)); + + // Fetch and store enough future frames to provide at least 3 RNNoise chunks. + for (int i = 0; i < future_frames_needed; i++) { + mlt_position future_pos = frame_pos + i + 1; + mlt_frame future_frame = NULL; + mlt_producer_seek(self->next, future_pos); + error = mlt_service_get_frame(MLT_PRODUCER_SERVICE(self->next), &future_frame, index); + if (error) { + mlt_log_error(MLT_LINK_SERVICE(self), + "Error getting future frame: %d\n", + (int) future_pos); + } + char key[19]; + sprintf(key, "%d", (int) future_pos); + mlt_properties_set_data(unique_properties, + key, + future_frame, + 0, + (mlt_destructor) mlt_frame_close, + NULL); + } + + mlt_frame_push_audio(*frame, (void *) self); + mlt_frame_push_audio(*frame, link_get_audio); + + mlt_producer_prepare_next(MLT_LINK_PRODUCER(self)); + + return error; +} + +static void link_configure(mlt_link self, mlt_profile chain_profile) +{ + // Operate at the same frame rate as the next link in the chain + mlt_service_set_profile(MLT_LINK_SERVICE(self), + mlt_service_profile(MLT_PRODUCER_SERVICE(self->next))); +} + +static void link_close(mlt_link self) +{ + if (self) { + private_data *pdata = (private_data *) self->child; + if (pdata) { + for (int c = 0; c < MAX_CHANNELS; c++) { + if (pdata->states[c]) { + rnnoise_destroy(pdata->states[c]); + } + free(pdata->out_carry[c]); + } + free(pdata); + } + self->child = NULL; + self->close = NULL; + mlt_link_close(self); + free(self); + } +} + +mlt_link link_rnnoise_init(mlt_profile profile, mlt_service_type type, const char *id, char *arg) +{ + mlt_link self = mlt_link_init(); + private_data *pdata = (private_data *) calloc(1, sizeof(private_data)); + + if (self && pdata) { + pdata->expected_frame = -1; + pdata->continuity_frame = -1; + pdata->continuity_sample = 0; + self->child = pdata; + + // Set default property + mlt_properties_set_double(MLT_LINK_PROPERTIES(self), "mix", 1.0); + + // Register callbacks + self->configure = link_configure; + self->get_frame = link_get_frame; + self->close = link_close; + } else { + free(pdata); + if (self) { + mlt_link_close(self); + self = NULL; + } + } + + return self; +} diff --git a/src/modules/rnnoise/link_rnnoise.yml b/src/modules/rnnoise/link_rnnoise.yml new file mode 100644 index 000000000..80ca5796e --- /dev/null +++ b/src/modules/rnnoise/link_rnnoise.yml @@ -0,0 +1,34 @@ +schema_version: 7.2 +type: link +identifier: rnnoise +title: Noise Reduction (RNNoise) +version: 1 +copyright: Meltytech, LLC +license: LGPLv2.1 +language: en +url: https://github.com/xiph/rnnoise +tags: + - Audio +description: > + Reduce background noise in speech tracks using the RNNoise recurrent neural + network library. +notes: > + Operates at 48 kHz; audio is resampled automatically if needed. + + Unlike filter_rnnoise, this link does not add a delay to the audio. +audio_formats: + - float +parameters: + - identifier: mix + title: Mix + type: float + description: > + The wet/dry blend of the noise-reduced signal. + 0 means no noise reduction (dry), 1 means fully noise-reduced (wet). + minimum: 0 + maximum: 1 + default: 1 + scale: 100 + unit: '%' + mutable: yes + animation: yes diff --git a/vcpkg.json b/vcpkg.json index c8bc1964d..10b0c66ac 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -24,6 +24,10 @@ "pthreads", "qtbase", "qtsvg", + { + "name": "rnnoise", + "platform": "!windows" + }, "rtaudio", "rubberband", "sdl2",