Skip to content

Commit 801de42

Browse files
feat: add threshold/onset-triggered recording with configurable options
Co-authored-by: aider (openrouter/openai/gpt-5) <aider@aider.chat>
1 parent fd38643 commit 801de42

4 files changed

Lines changed: 310 additions & 20 deletions

File tree

src/lib/components/Voice.svelte

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
let uploadError = $state<string | null>(null);
1919
2020
// Recording state
21-
let recordingStatus = $state<'idle' | 'requesting-permission' | 'recording' | 'processing' | 'error'>('idle');
21+
let recordingStatus = $state<'idle' | 'requesting-permission' | 'waiting' | 'recording' | 'processing' | 'error'>('idle');
2222
let recordingProgress = $state<number>(0);
2323
let recordingError = $state<string | null>(null);
2424
@@ -266,6 +266,9 @@
266266
if (progress.stage === 'requesting') {
267267
recordingStatus = 'requesting-permission';
268268
recordingProgress = 0;
269+
} else if (progress.stage === 'waiting') {
270+
recordingStatus = 'waiting';
271+
recordingProgress = 0;
269272
} else if (progress.stage === 'recording') {
270273
recordingStatus = 'recording';
271274
recordingProgress = progress.percentage;
@@ -337,10 +340,12 @@
337340
<img src={imageSrc} alt="Voice" class="w-20 h-20" />
338341

339342
<!-- Recording progress indicator -->
340-
{#if recordingStatus === 'recording' || recordingStatus === 'processing'}
343+
{#if recordingStatus === 'waiting' || recordingStatus === 'recording' || recordingStatus === 'processing'}
341344
<div class="absolute inset-0 flex items-center justify-center bg-yellow-500 bg-opacity-70 rounded-lg">
342345
<div class="text-white text-xs font-bold">
343-
{#if recordingStatus === 'recording'}
346+
{#if recordingStatus === 'waiting'}
347+
🎧 Waiting...
348+
{:else if recordingStatus === 'recording'}
344349
🎤 {Math.round(recordingProgress)}%
345350
{:else}
346351
⚙️ {Math.round(recordingProgress)}%

src/lib/services/audioRecorder.ts

Lines changed: 104 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import { createLogger } from '$lib/utils/logger';
1616
import type { ProcessedAudio } from './audioProcessor';
17+
import { captureOnset } from './onsetDetector';
1718

1819
const logger = createLogger('AudioRecorder');
1920

@@ -29,17 +30,43 @@ export interface RecordingOptions {
2930
deviceId?: string;
3031

3132
/**
32-
* Optional threshold level (0-1) for automatic recording trigger.
33-
* Not implemented yet, reserved for future use.
33+
* Optional threshold level (0-1) for automatic onset-triggered recording.
34+
* When provided, recording will arm and wait until the level crosses this threshold,
35+
* then capture exactly 1 second including optional pre-roll.
3436
*/
3537
threshold?: number;
38+
39+
/**
40+
* Optional pre-roll time in milliseconds included before the trigger point.
41+
* Defaults to 120ms. The post-trigger capture length is reduced accordingly
42+
* so the total captured length remains 1 second.
43+
*/
44+
preRollMs?: number;
45+
46+
/**
47+
* Minimum time in milliseconds the signal must stay above threshold to trigger.
48+
* Defaults to 12ms.
49+
*/
50+
holdMs?: number;
51+
52+
/**
53+
* Timeout in milliseconds while waiting for a trigger before aborting.
54+
* Defaults to 10000ms.
55+
*/
56+
timeoutMs?: number;
57+
58+
/**
59+
* Optional high-pass filter cutoff in Hz to reduce low-frequency rumble.
60+
* Defaults to 80Hz.
61+
*/
62+
highpassHz?: number;
3663
}
3764

3865
export interface RecordingProgress {
3966
/**
40-
* Recording stage: 'requesting' | 'recording' | 'processing'
67+
* Recording stage: 'requesting' | 'waiting' | 'recording' | 'processing'
4168
*/
42-
stage: 'requesting' | 'recording' | 'processing';
69+
stage: 'requesting' | 'waiting' | 'recording' | 'processing';
4370

4471
/**
4572
* Progress percentage (0-100)
@@ -82,25 +109,85 @@ export async function recordAudio(
82109
stream = await navigator.mediaDevices.getUserMedia(constraints);
83110
logger.debug('Microphone access granted');
84111

85-
// Start recording
86-
onProgress?.({ stage: 'recording', percentage: 0 });
112+
if (typeof options.threshold === 'number') {
113+
// Onset-triggered recording path (wait for threshold then capture 1s incl. pre-roll)
114+
onProgress?.({ stage: 'waiting', percentage: 0 });
87115

88-
const audioBlob = await recordForDuration(stream, RECORDING_DURATION_MS, onProgress);
89-
logger.debug(`Recorded ${audioBlob.size} bytes`);
116+
const preRollMs = Math.max(0, Math.floor(options.preRollMs ?? 120));
117+
const captureMs = Math.max(1, 1000 - preRollMs);
90118

91-
// Stop all tracks
92-
stream.getTracks().forEach(track => track.stop());
93-
stream = null;
119+
const { samples, sampleRate } = await captureOnset(stream, {
120+
threshold: options.threshold,
121+
preRollMs,
122+
holdMs: Math.max(1, Math.floor(options.holdMs ?? 12)),
123+
timeoutMs: Math.max(1000, Math.floor(options.timeoutMs ?? 10000)),
124+
highpassHz: options.highpassHz ?? 80,
125+
captureMs
126+
});
127+
128+
// Stop all tracks
129+
stream.getTracks().forEach(track => track.stop());
130+
stream = null;
131+
132+
// Convert to DRUM-compatible format
133+
onProgress?.({ stage: 'processing', percentage: 80 });
134+
135+
let mono = samples;
136+
137+
if (sampleRate !== TARGET_SAMPLE_RATE) {
138+
mono = resample(mono, sampleRate, TARGET_SAMPLE_RATE);
139+
}
94140

95-
// Process the recorded audio
96-
onProgress?.({ stage: 'processing', percentage: 90 });
141+
// Ensure exactly 1 second (keep leading pre-roll)
142+
if (mono.length > MAX_SAMPLES) {
143+
mono = mono.slice(0, MAX_SAMPLES);
144+
} else if (mono.length < MAX_SAMPLES) {
145+
const padded = new Float32Array(MAX_SAMPLES);
146+
padded.set(mono, 0);
147+
mono = padded;
148+
}
149+
150+
// Convert to 16-bit PCM
151+
const pcmBuffer = new ArrayBuffer(mono.length * 2);
152+
const pcmView = new DataView(pcmBuffer);
153+
for (let i = 0; i < mono.length; i++) {
154+
const sample = Math.max(-1, Math.min(1, mono[i]));
155+
const intSample = Math.round(sample * 32767);
156+
pcmView.setInt16(i * 2, intSample, true);
157+
}
158+
159+
const processedAudio: ProcessedAudio = {
160+
pcmData: new Uint8Array(pcmBuffer),
161+
sampleRate: TARGET_SAMPLE_RATE,
162+
duration: mono.length / TARGET_SAMPLE_RATE,
163+
originalFileName: `recording-${Date.now()}.wav`
164+
};
165+
166+
logger.info('Recording completed successfully');
167+
onProgress?.({ stage: 'processing', percentage: 100 });
168+
169+
return processedAudio;
170+
} else {
171+
// Start fixed-duration recording path
172+
onProgress?.({ stage: 'recording', percentage: 0 });
97173

98-
const processedAudio = await processRecording(audioBlob);
99-
logger.info('Recording completed successfully');
174+
const audioBlob = await recordForDuration(stream, RECORDING_DURATION_MS, onProgress);
175+
logger.debug(`Recorded ${audioBlob.size} bytes`);
100176

101-
onProgress?.({ stage: 'processing', percentage: 100 });
177+
// Stop all tracks
178+
stream.getTracks().forEach(track => track.stop());
179+
stream = null;
180+
181+
// Process the recorded audio
182+
onProgress?.({ stage: 'processing', percentage: 90 });
183+
184+
const processedAudio = await processRecording(audioBlob);
185+
logger.info('Recording completed successfully');
102186

103-
return processedAudio;
187+
onProgress?.({ stage: 'processing', percentage: 100 });
188+
189+
return processedAudio;
190+
}
104191

105192
} catch (error) {
106193
// Cleanup on error

src/lib/services/onsetDetector.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import { createLogger } from '$lib/utils/logger';
2+
3+
const logger = createLogger('OnsetDetector');
4+
5+
export interface OnsetConfig {
6+
threshold: number;
7+
preRollMs: number;
8+
holdMs: number;
9+
timeoutMs: number;
10+
highpassHz?: number;
11+
captureMs: number; // total capture length AFTER preroll; total output = preRollMs + captureMs
12+
}
13+
14+
/**
15+
* Capture a 1-channel Float32Array when an onset crosses a threshold,
16+
* including an optional pre-roll. Uses an AudioWorklet for low-latency detection.
17+
*/
18+
export async function captureOnset(
19+
stream: MediaStream,
20+
cfg: OnsetConfig
21+
): Promise<{ samples: Float32Array; sampleRate: number }> {
22+
const AudioCtx: typeof AudioContext = (window as any).AudioContext || (window as any).webkitAudioContext;
23+
const ac = new AudioCtx();
24+
25+
await ac.audioWorklet.addModule(new URL('./worklets/onset-processor.js', import.meta.url));
26+
27+
const src = ac.createMediaStreamSource(stream);
28+
const node = new AudioWorkletNode(ac, 'onset-detector', { numberOfInputs: 1, numberOfOutputs: 0 });
29+
30+
src.connect(node);
31+
32+
node.port.postMessage({
33+
threshold: cfg.threshold,
34+
preRollMs: cfg.preRollMs,
35+
holdMs: cfg.holdMs,
36+
captureMs: cfg.captureMs,
37+
highpassHz: cfg.highpassHz ?? 80
38+
});
39+
40+
return new Promise((resolve, reject) => {
41+
let settled = false;
42+
43+
const cleanup = () => {
44+
try { src.disconnect(); } catch {}
45+
try { node.disconnect(); } catch {}
46+
node.port.onmessage = null;
47+
ac.close().catch(() => {});
48+
};
49+
50+
const timer = window.setTimeout(() => {
51+
if (settled) return;
52+
settled = true;
53+
logger.warn('Onset detection timed out');
54+
cleanup();
55+
reject(new Error('Onset detection timed out'));
56+
}, cfg.timeoutMs);
57+
58+
node.port.onmessage = (e: MessageEvent) => {
59+
const data: any = (e as any).data;
60+
if (!data) return;
61+
62+
if (data.type === 'trigger') {
63+
// could emit progress here if needed
64+
return;
65+
}
66+
67+
if (data.type === 'data') {
68+
if (settled) return;
69+
settled = true;
70+
window.clearTimeout(timer);
71+
72+
let samples: Float32Array;
73+
if (data.samples instanceof Float32Array) {
74+
samples = data.samples as Float32Array;
75+
} else if (data.samples instanceof ArrayBuffer) {
76+
samples = new Float32Array(data.samples as ArrayBuffer);
77+
} else if (data.buffer) {
78+
samples = new Float32Array(data.buffer as ArrayBuffer);
79+
} else {
80+
samples = Float32Array.from(data.samples);
81+
}
82+
83+
cleanup();
84+
resolve({ samples, sampleRate: data.sampleRate as number });
85+
}
86+
};
87+
});
88+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/* AudioWorkletProcessor: onset-triggered capture with optional pre-roll.
2+
Captures exactly preRollMs + captureMs samples after trigger.
3+
*/
4+
class DCBlocker {
5+
constructor(cutHz, sr) {
6+
const twoPi = 2 * Math.PI;
7+
this.R = Math.exp(-twoPi * (cutHz / sr));
8+
this.x1 = 0;
9+
this.y1 = 0;
10+
}
11+
process(x) {
12+
const y = x - this.x1 + this.R * this.y1;
13+
this.x1 = x;
14+
this.y1 = y;
15+
return y;
16+
}
17+
}
18+
19+
registerProcessor('onset-detector', class extends AudioWorkletProcessor {
20+
constructor() {
21+
super();
22+
this.cfg = {
23+
threshold: 0.15,
24+
preRoll: 0,
25+
holdSamples: 1,
26+
capLen: Math.round(48000),
27+
hpHz: 80
28+
};
29+
this.dc = new DCBlocker(this.cfg.hpHz, sampleRate);
30+
31+
this.ring = new Float32Array(this.cfg.preRoll);
32+
this.ringIdx = 0;
33+
34+
this.above = 0;
35+
this.capturing = false;
36+
37+
this.buf = new Float32Array(this.cfg.preRoll + this.cfg.capLen);
38+
this.writeIdx = 0;
39+
40+
this.port.onmessage = (e) => {
41+
const c = e.data || {};
42+
this.cfg.threshold = typeof c.threshold === 'number' ? c.threshold : this.cfg.threshold;
43+
const pre = Math.max(0, Math.round(((c.preRollMs ?? 0) * sampleRate) / 1000));
44+
const hold = Math.max(1, Math.round(((c.holdMs ?? 1) * sampleRate) / 1000));
45+
const cap = Math.max(1, Math.round(((c.captureMs ?? 1000) * sampleRate) / 1000));
46+
const hp = typeof c.highpassHz === 'number' ? c.highpassHz : this.cfg.hpHz;
47+
48+
this.cfg.preRoll = pre;
49+
this.cfg.holdSamples = hold;
50+
this.cfg.capLen = cap;
51+
this.cfg.hpHz = hp;
52+
53+
this.dc = new DCBlocker(this.cfg.hpHz, sampleRate);
54+
this.ring = new Float32Array(this.cfg.preRoll);
55+
this.ringIdx = 0;
56+
this.above = 0;
57+
this.capturing = false;
58+
this.buf = new Float32Array(this.cfg.preRoll + this.cfg.capLen);
59+
this.writeIdx = 0;
60+
};
61+
}
62+
63+
process(inputs) {
64+
const ch = inputs[0] && inputs[0][0];
65+
if (!ch) return true;
66+
67+
for (let i = 0; i < ch.length; i++) {
68+
const s = this.dc.process(ch[i]);
69+
70+
if (this.ring.length > 0) {
71+
this.ring[this.ringIdx] = s;
72+
this.ringIdx = (this.ringIdx + 1) % this.ring.length;
73+
}
74+
75+
if (!this.capturing) {
76+
if (Math.abs(s) >= this.cfg.threshold) {
77+
this.above++;
78+
if (this.above >= this.cfg.holdSamples) {
79+
// Trigger: copy preroll into buffer oldest->newest
80+
const pre = this.ring.length;
81+
if (pre > 0) {
82+
for (let k = 0; k < pre; k++) {
83+
const idx = (this.ringIdx + k) % pre;
84+
this.buf[k] = this.ring[idx];
85+
}
86+
}
87+
this.writeIdx = pre;
88+
this.capturing = true;
89+
this.port.postMessage({ type: 'trigger' });
90+
}
91+
} else {
92+
this.above = 0;
93+
}
94+
}
95+
96+
if (this.capturing) {
97+
if (this.writeIdx < this.buf.length) {
98+
this.buf[this.writeIdx++] = s;
99+
if (this.writeIdx >= this.buf.length) {
100+
// Done: transfer the buffer
101+
this.port.postMessage({ type: 'data', sampleRate, samples: this.buf }, [this.buf.buffer]);
102+
return false; // stop processor
103+
}
104+
}
105+
}
106+
}
107+
108+
return true;
109+
}
110+
});

0 commit comments

Comments
 (0)