Skip to content

Commit e5305a3

Browse files
committed
Fix Whisper model download and add mic verification test
- Rewrite downloadModel to use whisper.load_model() so it actually downloads weights (~150MB for turbo) - Derive venv python from whisper binary path when command is not module form - Add test-whisper-recording IPC handler and preload bridge - Add mic test panel to onboarding model-download screen - Require a successful 5-second transcription test before continuing when 'Download Now' is chosen - Restore test state when navigating back to the model-download screen
1 parent 750c5f9 commit e5305a3

6 files changed

Lines changed: 195 additions & 48 deletions

File tree

main.js

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,15 @@ class ApplicationController {
379379
return speechService.isAvailable ? speechService.isAvailable() : false;
380380
});
381381

382+
ipcMain.handle("test-whisper-recording", async () => {
383+
try {
384+
const result = await speechService.testWhisperRecording(5000);
385+
return result;
386+
} catch (error) {
387+
return { ok: false, error: error.message };
388+
}
389+
});
390+
382391
ipcMain.handle("start-speech-recognition", () => {
383392
speechService.startRecording();
384393
return speechService.getStatus();

onboarding.html

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,21 @@ <h1>Whisper Model Download</h1>
909909
</div>
910910

911911
<div class="install-log" id="modelDownloadLog"></div>
912+
913+
<!-- Whisper test panel (shown after model is downloaded) -->
914+
<div class="install-card" id="whisperTestCard" style="display:none; margin-top: 14px;">
915+
<div class="install-title">
916+
<i class="fas fa-microphone"></i>
917+
<span>Test Whisper</span>
918+
</div>
919+
<p style="font-size: 12px; color: var(--text-dim); margin-bottom: 12px;">
920+
Click the button below and speak a short sentence to confirm the microphone and Whisper model are working.
921+
</p>
922+
<button class="btn primary" id="testWhisperBtn" type="button">
923+
<i class="fas fa-microphone-lines"></i> Record 5-second test
924+
</button>
925+
<div id="whisperTestResult" style="margin-top: 12px; font-size: 12px; color: var(--text-dim);"></div>
926+
</div>
912927
</section>
913928

914929
<!-- SCREEN 6: Finish (star prompt + summary) -->

onboarding.js

Lines changed: 89 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
skippingWhisper: false,
4040
modelDownloadChoice: null, // 'now' | 'later'
4141
modelDownloading: false,
42+
modelDownloaded: false,
43+
whisperTestPassed: false,
44+
whisperTesting: false,
4245
finished: false,
4346
};
4447

@@ -73,8 +76,12 @@
7376
}
7477
refreshStepper();
7578
backBtn.style.visibility = state.step === 0 ? 'hidden' : 'visible';
76-
// Reset next button state unless we're actively downloading a model
77-
if (name !== 'model-download' || !state.modelDownloading) {
79+
// Reset next button state unless we're actively downloading a model or awaiting a test
80+
const awaitingTest = name === 'model-download' &&
81+
state.modelDownloadChoice === 'now' &&
82+
state.modelDownloaded &&
83+
!state.whisperTestPassed;
84+
if (name !== 'model-download' || (!state.modelDownloading && !state.whisperTesting && !awaitingTest)) {
7885
nextBtn.disabled = false;
7986
nextBtn.classList.remove('success');
8087
nextBtn.classList.add('primary');
@@ -131,7 +138,10 @@
131138
// Allow advancing whether whisper is detected OR user skipped
132139
return state.whisperDetected || state.skippingWhisper;
133140
case 'model-download':
134-
return !!state.modelDownloadChoice && !state.modelDownloading;
141+
if (!state.modelDownloadChoice || state.modelDownloading) return false;
142+
// If user chose to download now, require a successful mic test before continuing.
143+
if (state.modelDownloadChoice === 'now' && state.modelDownloaded && !state.whisperTestPassed) return false;
144+
return true;
135145
case 'finish':
136146
return true;
137147
default:
@@ -407,19 +417,44 @@
407417
if (value === 'now') {
408418
// Start downloading the model immediately
409419
startModelDownload();
420+
} else {
421+
// 'later' doesn't need a test
422+
nextBtn.disabled = false;
410423
}
411424
});
412425
});
426+
427+
// Wire up the Whisper test button
428+
const testBtn = document.getElementById('testWhisperBtn');
429+
if (testBtn) {
430+
testBtn.addEventListener('click', runWhisperTest);
431+
}
413432
}
414433

415434
// Restore selection state when navigating back
416435
$$('#modelDownloadChoices .choice-card').forEach((card) => {
417436
card.classList.toggle('selected', card.dataset.value === state.modelDownloadChoice);
418437
});
419438

420-
// Re-enable continue button if a choice has been made (or download already completed)
421-
if (state.modelDownloadChoice && !state.modelDownloading) {
422-
nextBtn.disabled = false;
439+
// Restore test panel visibility and state
440+
const testCard = document.getElementById('whisperTestCard');
441+
if (testCard && state.modelDownloadChoice === 'now' && state.modelDownloaded) {
442+
testCard.style.display = 'block';
443+
}
444+
const testBtn = document.getElementById('testWhisperBtn');
445+
if (testBtn && state.whisperTestPassed) {
446+
testBtn.innerHTML = '<i class="fas fa-check-circle"></i> Test passed';
447+
testBtn.classList.remove('primary');
448+
testBtn.classList.add('success');
449+
}
450+
451+
// Re-enable continue button if a choice has been made and (if applicable) test passed
452+
if (state.modelDownloadChoice && !state.modelDownloading && !state.whisperTesting) {
453+
if (state.modelDownloadChoice === 'now' && state.modelDownloaded && !state.whisperTestPassed) {
454+
nextBtn.disabled = true;
455+
} else {
456+
nextBtn.disabled = false;
457+
}
423458
}
424459
}
425460

@@ -440,30 +475,71 @@
440475
const r = await window.electronAPI.downloadWhisperModel('turbo');
441476
state.modelDownloading = false;
442477
if (r.ok) {
478+
state.modelDownloaded = true;
443479
appendModelLog(`\n✓ Model downloaded successfully: ${r.path}`);
444-
// Re-enable continue so user can proceed to finish
445-
nextBtn.disabled = false;
446-
nextBtn.classList.remove('primary');
447-
nextBtn.classList.add('success');
448-
nextBtn.innerHTML = '<i class="fas fa-check-circle"></i> Continue';
480+
// Show the Whisper mic test panel
481+
const testCard = document.getElementById('whisperTestCard');
482+
if (testCard) testCard.style.display = 'block';
483+
// Keep Continue disabled until test passes
484+
nextBtn.disabled = true;
449485
} else {
450486
appendModelLog(`\n✗ Download failed: ${r.message}`);
451487
// Let user continue anyway; they'll download on first use
452488
nextBtn.disabled = false;
453-
nextBtn.innerHTML = 'Continue <i class="fas fa-arrow-right"></i>';
454489
}
455490
} catch (e) {
456491
state.modelDownloading = false;
457492
appendModelLog(`\n! Error: ${e.message || e}`);
458493
nextBtn.disabled = false;
459-
nextBtn.innerHTML = 'Continue <i class="fas fa-arrow-right"></i>';
460494
} finally {
461495
if (progressHandler && window.electronAPI.removeAllListeners) {
462496
try { window.electronAPI.removeAllListeners('install-progress'); } catch (_) { /* ignore */ }
463497
}
464498
}
465499
}
466500

501+
async function runWhisperTest() {
502+
const btn = document.getElementById('testWhisperBtn');
503+
const resultEl = document.getElementById('whisperTestResult');
504+
if (!btn || !window.electronAPI || !window.electronAPI.testWhisperRecording) return;
505+
506+
state.whisperTesting = true;
507+
btn.disabled = true;
508+
btn.innerHTML = '<span class="spinner"></span> Listening…';
509+
if (resultEl) resultEl.textContent = 'Speak now…';
510+
511+
try {
512+
const r = await window.electronAPI.testWhisperRecording();
513+
if (r.ok) {
514+
state.whisperTestPassed = true;
515+
if (resultEl) resultEl.innerHTML = `<span style="color: var(--success);">✓ Heard:</span> “${escapeHtml(r.text)}”`;
516+
btn.innerHTML = '<i class="fas fa-check-circle"></i> Test passed';
517+
btn.classList.remove('primary');
518+
btn.classList.add('success');
519+
nextBtn.disabled = false;
520+
nextBtn.classList.remove('primary');
521+
nextBtn.classList.add('success');
522+
nextBtn.innerHTML = '<i class="fas fa-check-circle"></i> Continue';
523+
} else {
524+
if (resultEl) resultEl.innerHTML = `<span style="color: var(--error);">✗ ${escapeHtml(r.error || 'Test failed')}</span>`;
525+
btn.disabled = false;
526+
btn.innerHTML = '<i class="fas fa-redo"></i> Retry test';
527+
}
528+
} catch (e) {
529+
if (resultEl) resultEl.innerHTML = `<span style="color: var(--error);">✗ ${escapeHtml(e.message || e)}</span>`;
530+
btn.disabled = false;
531+
btn.innerHTML = '<i class="fas fa-redo"></i> Retry test';
532+
} finally {
533+
state.whisperTesting = false;
534+
}
535+
}
536+
537+
function escapeHtml(text) {
538+
const div = document.createElement('div');
539+
div.textContent = text;
540+
return div.innerHTML;
541+
}
542+
467543
// ── Wire up: Finish screen ────────────────────────────────────────
468544
function populateSummary() {
469545
const rows = [];

preload.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ contextBridge.exposeInMainWorld('electronAPI', {
4949
detectWhisper: () => ipcRenderer.invoke('detect-whisper'),
5050
installWhisper: () => ipcRenderer.invoke('install-whisper'),
5151
downloadWhisperModel: (modelName) => ipcRenderer.invoke('download-whisper-model', modelName),
52+
testWhisperRecording: () => ipcRenderer.invoke('test-whisper-recording'),
5253
onInstallProgress: (callback) => {
5354
const wrapped = (_event, line) => {
5455
try { callback(line); } catch (e) { console.error('onInstallProgress error:', e); }

src/core/whisper-installer.js

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -528,51 +528,41 @@ class WhisperInstaller {
528528
}
529529

530530
const command = detectResult.command;
531-
log(`→ Downloading ${modelName} model using ${command}…`);
531+
log(`→ Preparing to download ${modelName} model…`);
532532

533-
// Parse the command to get the python executable and module
534-
let pythonCmd, moduleName;
533+
// Parse the command to get the python executable that owns the whisper module.
534+
let pythonCmd;
535535
if (command.includes(' -m ')) {
536-
const parts = command.split(' -m ');
537-
pythonCmd = parts[0].trim();
538-
moduleName = parts[1].trim();
536+
pythonCmd = command.split(' -m ')[0].trim();
539537
} else if (command.endsWith(' -m whisper')) {
540538
pythonCmd = command.replace(' -m whisper', '').trim();
541-
moduleName = 'whisper';
542539
} else {
543-
// Fallback: assume it's a direct whisper command
544-
pythonCmd = 'python3';
545-
moduleName = 'whisper';
540+
// The command is a whisper binary (likely inside the venv). Derive the
541+
// sibling python interpreter from the venv layout.
542+
const binDir = path.dirname(command);
543+
const isWin = this.platform === 'win32';
544+
const pythonExe = isWin ? 'python.exe' : 'python';
545+
const candidate = path.join(binDir, pythonExe);
546+
if (fs.existsSync(candidate)) {
547+
pythonCmd = candidate;
548+
} else {
549+
pythonCmd = isWin ? 'python' : 'python3';
550+
}
546551
}
547552

548-
const result = await this.runExec(pythonCmd, ['-m', moduleName, '--model', modelName, '--help'], {
549-
timeout: 30000,
553+
// whisper.load_model() downloads the weights lazily and prints progress
554+
// to stderr. We capture that output and relay it via onProgress.
555+
log(`→ Downloading ${modelName} weights (this may take a minute)…`);
556+
const loadResult = await this.runExec(pythonCmd, [
557+
'-c',
558+
`import whisper; whisper.load_model('${modelName}'); print('model_loaded')`
559+
], {
560+
timeout: 600000,
550561
onProgress: log,
551562
});
552563

553-
if (!result.ok) {
554-
// Try running a small transcription to trigger download
555-
log(`→ Triggering model download via test transcription…`);
556-
const testResult = await this.runExec(pythonCmd, ['-m', moduleName, '--model', modelName, '--language', 'en', '/dev/null'], {
557-
timeout: 120000,
558-
onProgress: log,
559-
});
560-
561-
if (!testResult.ok) {
562-
// Check if it's just a file not found error (model downloading)
563-
if (testResult.stderr && testResult.stderr.includes('Downloading')) {
564-
// Wait for download to complete
565-
const downloadResult = await this.runExec(pythonCmd, ['-m', moduleName, '--model', modelName, '--help'], {
566-
timeout: 300000,
567-
onProgress: log,
568-
});
569-
if (downloadResult.ok) {
570-
const modelPath = this._getModelPath(modelName);
571-
return { ok: true, message: `Model ${modelName} downloaded successfully`, path: modelPath };
572-
}
573-
}
574-
return { ok: false, message: testResult.stderr || testResult.error };
575-
}
564+
if (!loadResult.ok) {
565+
return { ok: false, message: loadResult.stderr || loadResult.error };
576566
}
577567

578568
const modelPath = this._getModelPath(modelName);

src/services/speech.service.js

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,62 @@ class SpeechService extends EventEmitter {
818818
this._audioDataLogged = false;
819819
}
820820

821+
/**
822+
* Record a short audio clip and return the transcription.
823+
* Used by the onboarding wizard to verify the Whisper model works.
824+
*/
825+
async testWhisperRecording(durationMs = 5000) {
826+
return new Promise((resolve, reject) => {
827+
if (!this.isAvailable()) {
828+
reject(new Error('Speech recognition is not available'));
829+
return;
830+
}
831+
832+
let timeoutId = null;
833+
let transcriptionReceived = false;
834+
835+
const onTranscription = (text) => {
836+
if (transcriptionReceived) return;
837+
transcriptionReceived = true;
838+
cleanup();
839+
resolve({ ok: true, text: text.trim() });
840+
};
841+
842+
const onError = (error) => {
843+
if (transcriptionReceived) return;
844+
cleanup();
845+
reject(new Error(typeof error === 'string' ? error : error?.message || 'Speech test failed'));
846+
};
847+
848+
const cleanup = () => {
849+
if (timeoutId) clearTimeout(timeoutId);
850+
this.off('transcription', onTranscription);
851+
this.off('error', onError);
852+
};
853+
854+
this.once('transcription', onTranscription);
855+
this.once('error', onError);
856+
857+
try {
858+
this.startRecording();
859+
timeoutId = setTimeout(() => {
860+
if (transcriptionReceived) return;
861+
this.stopRecording();
862+
// Give Whisper a short window to emit transcription after stop
863+
setTimeout(() => {
864+
if (!transcriptionReceived) {
865+
cleanup();
866+
reject(new Error('No transcription received. Speak louder or check your microphone.'));
867+
}
868+
}, 3000);
869+
}, durationMs);
870+
} catch (error) {
871+
cleanup();
872+
reject(error);
873+
}
874+
});
875+
}
876+
821877
async recognizeFromFile(audioFilePath) {
822878
if (this.provider === 'azure') {
823879
if (!this.speechConfig) {

0 commit comments

Comments
 (0)