Skip to content

Commit f106a74

Browse files
committed
Onset detector
1 parent 75118b5 commit f106a74

16 files changed

Lines changed: 2489 additions & 6 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ __pycache__/
4747
.idea/
4848
.vs/
4949
.claude/
50+
.opencode/
5051
.pytest_cache/
5152
.cache/
5253
docs/superpowers/

examples/graphics/source/examples/AudioFileDemo.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,19 @@ class AudioFileWaveform : public yup::AudioViewComponent
5454
AudioViewComponent::clear();
5555
playheadSeconds = 0.0;
5656
lengthSeconds = 0.0;
57+
onsetTimes.clear();
58+
hasOnsets = false;
5759
updatePlayheadPosition();
5860
}
5961

62+
/** Sets onset markers to overlay on the waveform. */
63+
void setOnsetData (const std::vector<double>& times)
64+
{
65+
onsetTimes = times;
66+
hasOnsets = ! times.empty();
67+
repaint();
68+
}
69+
6070
/** Updates the playhead without repainting the full waveform. */
6171
void setPlayhead (double newPlayheadSeconds, double newLengthSeconds)
6272
{
@@ -72,6 +82,32 @@ class AudioFileWaveform : public yup::AudioViewComponent
7282
updatePlayheadPosition();
7383
}
7484

85+
void paint (yup::Graphics& g) override
86+
{
87+
AudioViewComponent::paint (g);
88+
89+
if (! hasOnsets || getSampleRate() <= 0.0)
90+
return;
91+
92+
const auto waveformBounds = getWaveformBounds();
93+
const auto viewRange = getViewRangeSamples();
94+
95+
g.setStrokeColor (yup::Color (0xFF00FF00).withAlpha (0.4f));
96+
97+
for (auto t : onsetTimes)
98+
{
99+
const double sample = timeToSample (t);
100+
101+
if (sample < viewRange.getStart())
102+
continue;
103+
else if (sample > viewRange.getEnd())
104+
break;
105+
106+
const float x = sampleToX (sample, waveformBounds);
107+
g.strokeLine (x, waveformBounds.getY(), x, waveformBounds.getBottom());
108+
}
109+
}
110+
75111
private:
76112
class PlayheadMarker : public yup::Component
77113
{
@@ -124,6 +160,8 @@ class AudioFileWaveform : public yup::AudioViewComponent
124160
PlayheadMarker playhead;
125161
double playheadSeconds = 0.0;
126162
double lengthSeconds = 0.0;
163+
std::vector<double> onsetTimes;
164+
bool hasOnsets = false;
127165

128166
YUP_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (AudioFileWaveform)
129167
};
@@ -1014,6 +1052,26 @@ class AudioFileDemo : public yup::Component
10141052

10151053
waveformDisplay.setSource (&audioBuffer, loadedSampleRate);
10161054
waveformDisplay.setPlayhead (0.0, audioLengthSeconds);
1055+
1056+
// Compute onsets in background
1057+
waveformThreadPool.addJob ([this]
1058+
{
1059+
superFlux.prepare ({ .spectrogram = { .fftSize = 2048, .fps = 200 },
1060+
.useFilterBank = true,
1061+
.peakPicker = { .threshold = 0.8f },
1062+
.refineOnsets = true },
1063+
loadedSampleRate);
1064+
1065+
superFlux.processOffline (audioBuffer);
1066+
onsetTimes = superFlux.getOnsetTimes();
1067+
hasOnsets = true;
1068+
1069+
yup::MessageManager::callAsync ([this]
1070+
{
1071+
waveformDisplay.setOnsetData (onsetTimes);
1072+
});
1073+
});
1074+
10171075
updateStatus ("Loaded " + file.getFileName() + " | " + yup::String (numChannels)
10181076
+ " ch | " + yup::String (loadedSampleRate, 1) + " Hz | "
10191077
+ formatTime (audioLengthSeconds));
@@ -1193,6 +1251,10 @@ class AudioFileDemo : public yup::Component
11931251
bool loopEnabled = false;
11941252
bool timeStretchSupported = false;
11951253

1254+
yup::OnsetDetector superFlux;
1255+
std::vector<double> onsetTimes;
1256+
bool hasOnsets = false;
1257+
11961258
static constexpr int backendAutomaticId = 1;
11971259
static constexpr int backendTimeDomainId = 2;
11981260
static constexpr int backendBungeeId = 3;
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*
2+
==============================================================================
3+
4+
This file is part of the YUP library.
5+
Copyright (c) 2026 - kunitoki@gmail.com
6+
7+
YUP is an open source library subject to open-source licensing.
8+
9+
The code included in this file is provided under the terms of the ISC license
10+
http://www.isc.org/downloads/software-support-policy/isc-license. Permission
11+
to use, copy, modify, and/or distribute this software for any purpose with or
12+
without fee is hereby granted provided that the above copyright notice and
13+
this permission notice appear in all copies.
14+
15+
YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
16+
EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
17+
DISCLAIMED.
18+
19+
==============================================================================
20+
*/
21+
22+
namespace yup
23+
{
24+
25+
void FilterBank::build (int bandsPerOctave, float fMin, float fMax, int numFFTBins_, float sampleRate, bool equalizeArea)
26+
{
27+
jassert (bandsPerOctave > 0);
28+
jassert (numFFTBins_ > 0);
29+
jassert (sampleRate > 0.0f);
30+
31+
numFFTBins = numFFTBins_;
32+
33+
const auto nyquist = sampleRate * 0.5f;
34+
if (fMax > nyquist)
35+
fMax = nyquist;
36+
37+
auto frequencies = generateFrequencies (bandsPerOctave, fMin, fMax);
38+
39+
const float factor = nyquist / static_cast<float> (numFFTBins);
40+
41+
for (auto& f : frequencies)
42+
f = std::round (f / factor);
43+
44+
std::vector<float> uniqueFrequencies;
45+
{
46+
float prev = -1.0f;
47+
for (auto f : frequencies)
48+
{
49+
if (f != prev)
50+
uniqueFrequencies.push_back (f);
51+
prev = f;
52+
}
53+
}
54+
55+
frequencies = std::move (uniqueFrequencies);
56+
frequencies.erase (
57+
std::remove_if (frequencies.begin(), frequencies.end(), [&] (float f)
58+
{
59+
return f >= static_cast<float> (numFFTBins);
60+
}),
61+
frequencies.end());
62+
63+
const int bands = static_cast<int> (frequencies.size()) - 2;
64+
jassert (bands >= 3);
65+
66+
matrix.assign (static_cast<std::size_t> (numFFTBins) * static_cast<std::size_t> (bands), 0.0f);
67+
numBands = bands;
68+
69+
for (int band = 0; band < bands; ++band)
70+
{
71+
const int start = static_cast<int> (frequencies[static_cast<std::size_t> (band)]);
72+
const int mid = static_cast<int> (frequencies[static_cast<std::size_t> (band) + 1]);
73+
const int stop = static_cast<int> (frequencies[static_cast<std::size_t> (band) + 2]);
74+
75+
if (mid <= start || stop <= mid)
76+
continue;
77+
78+
const float height = equalizeArea ? 2.0f / static_cast<float> (stop - start) : 1.0f;
79+
80+
for (int bin = start; bin < mid; ++bin)
81+
{
82+
const float t = static_cast<float> (bin - start) / static_cast<float> (mid - start);
83+
matrix[static_cast<std::size_t> (bin) * static_cast<std::size_t> (numBands)
84+
+ static_cast<std::size_t> (band)] = t * height;
85+
}
86+
87+
for (int bin = mid; bin < stop; ++bin)
88+
{
89+
const float t = static_cast<float> (bin - mid) / static_cast<float> (stop - mid);
90+
matrix[static_cast<std::size_t> (bin) * static_cast<std::size_t> (numBands)
91+
+ static_cast<std::size_t> (band)] = (1.0f - t) * height;
92+
}
93+
}
94+
}
95+
96+
void FilterBank::applySingleFrame (const float* magnitudeIn, float* magnitudeOut) const noexcept
97+
{
98+
jassert (magnitudeIn != nullptr && magnitudeOut != nullptr);
99+
100+
for (int band = 0; band < numBands; ++band)
101+
{
102+
float sum = 0.0f;
103+
104+
for (int bin = 0; bin < numFFTBins; ++bin)
105+
sum += magnitudeIn[bin] * matrix[static_cast<std::size_t> (bin) * static_cast<std::size_t> (numBands) + static_cast<std::size_t> (band)];
106+
107+
magnitudeOut[band] = sum;
108+
}
109+
}
110+
111+
void FilterBank::applyMultipleFrames (const float* spectrogram, float* filtered, int numFrames) const noexcept
112+
{
113+
jassert (spectrogram != nullptr && filtered != nullptr);
114+
jassert (numFrames > 0);
115+
116+
for (int frame = 0; frame < numFrames; ++frame)
117+
{
118+
applySingleFrame (spectrogram + static_cast<std::size_t> (frame) * static_cast<std::size_t> (numFFTBins),
119+
filtered + static_cast<std::size_t> (frame) * static_cast<std::size_t> (numBands));
120+
}
121+
}
122+
123+
std::vector<float> FilterBank::generateFrequencies (int bandsPerOctave, float fMin, float fMax)
124+
{
125+
constexpr float a = 440.0f;
126+
const float factor = std::pow (2.0f, 1.0f / static_cast<float> (bandsPerOctave));
127+
128+
std::vector<float> frequencies;
129+
frequencies.push_back (a);
130+
131+
float freq = a;
132+
while (freq <= fMax)
133+
{
134+
freq *= factor;
135+
frequencies.push_back (freq);
136+
}
137+
138+
freq = a;
139+
while (freq >= fMin)
140+
{
141+
freq /= factor;
142+
frequencies.push_back (freq);
143+
}
144+
145+
std::sort (frequencies.begin(), frequencies.end());
146+
return frequencies;
147+
}
148+
149+
} // namespace yup
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
==============================================================================
3+
4+
This file is part of the YUP library.
5+
Copyright (c) 2026 - kunitoki@gmail.com
6+
7+
YUP is an open source library subject to open-source licensing.
8+
9+
The code included in this file is provided under the terms of the ISC license
10+
http://www.isc.org/downloads/software-support-policy/isc-license. Permission
11+
to use, copy, modify, and/or distribute this software for any purpose with or
12+
without fee is hereby granted provided that the above copyright notice and
13+
this permission notice appear in all copies.
14+
15+
YUP IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
16+
EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
17+
DISCLAIMED.
18+
19+
==============================================================================
20+
*/
21+
22+
#pragma once
23+
24+
namespace yup
25+
{
26+
27+
//==============================================================================
28+
/**
29+
A log-spaced triangular filter bank for dimensionality reduction of magnitude
30+
spectrograms.
31+
32+
Maps FFT frequency bins to a smaller set of perceptually-motivated filter bands
33+
(e.g. quarter-tone resolution = 24 bands per octave). Each band uses a triangular
34+
filter with rising edge from the previous center frequency, peak at the current
35+
center, and falling edge to the next center.
36+
37+
The filter bank matrix has dimensions [numFFTBins x numBands] stored row-major,
38+
where matrix[fftBin * numBands + band] gives the contribution of that FFT bin
39+
to that filter band.
40+
41+
@see Spectrogram, SuperFluxODF
42+
*/
43+
class FilterBank
44+
{
45+
public:
46+
//==============================================================================
47+
/** Default constructor. Call build() to initialize the matrix. */
48+
FilterBank() = default;
49+
50+
//==============================================================================
51+
/**
52+
Builds the filter bank matrix for the given parameters.
53+
54+
@param bandsPerOctave Number of bands per octave (e.g. 24 for quarter-tone)
55+
@param fMin Minimum frequency in Hz (default 30)
56+
@param fMax Maximum frequency in Hz (default 17000, clamped to Nyquist)
57+
@param numFFTBins Number of FFT frequency bins (typically fftSize / 2)
58+
@param sampleRate Audio sample rate in Hz
59+
@param equalizeArea If true, normalize each triangular filter to have area = 1
60+
*/
61+
void build (int bandsPerOctave, float fMin, float fMax, int numFFTBins, float sampleRate, bool equalizeArea = false);
62+
63+
//==============================================================================
64+
/** Returns the number of filter bands. */
65+
int getNumBands() const noexcept { return numBands; }
66+
67+
/** Returns the number of FFT bins the filter bank was built for. */
68+
int getNumFFTBins() const noexcept { return numFFTBins; }
69+
70+
//==============================================================================
71+
/**
72+
Applies the filter bank to a single frame of magnitude data.
73+
74+
@param magnitudeIn Input array of length getNumFFTBins()
75+
@param magnitudeOut Output array of length getNumBands()
76+
*/
77+
void applySingleFrame (const float* magnitudeIn, float* magnitudeOut) const noexcept;
78+
79+
/**
80+
Applies the filter bank to multiple frames.
81+
82+
@param spectrogram Input row-major array [numFrames x numFFTBins]
83+
@param filtered Output row-major array [numFrames x numBands]
84+
@param numFrames Number of frames
85+
*/
86+
void applyMultipleFrames (const float* spectrogram, float* filtered, int numFrames) const noexcept;
87+
88+
//==============================================================================
89+
/** Returns raw matrix data. Row-major: matrix[fftBin * numBands + band]. */
90+
const float* getMatrixData() const noexcept { return matrix.data(); }
91+
92+
private:
93+
//==============================================================================
94+
static std::vector<float> generateFrequencies (int bandsPerOctave, float fMin, float fMax);
95+
96+
//==============================================================================
97+
std::vector<float> matrix;
98+
int numFFTBins = 0;
99+
int numBands = 0;
100+
};
101+
102+
} // namespace yup

0 commit comments

Comments
 (0)