Fix Whisper model download and add mic verification test

TechyCSR · TechyCSR · commit e5305a32810a · 2026-06-23T22:56:26.000+05:30
- Rewrite downloadModel to use whisper.load_model() so it actually downloads weights (~150MB for turbo)
- Derive venv python from whisper binary path when command is not module form
- Add test-whisper-recording IPC handler and preload bridge
- Add mic test panel to onboarding model-download screen
- Require a successful 5-second transcription test before continuing when 'Download Now' is chosen
- Restore test state when navigating back to the model-download screen
diff --git a/main.js b/main.js
@@ -379,6 +379,15 @@ class ApplicationController {
       return speechService.isAvailable ? speechService.isAvailable() : false;
     });
 
+    ipcMain.handle("test-whisper-recording", async () => {
+      try {
+        const result = await speechService.testWhisperRecording(5000);
+        return result;
+      } catch (error) {
+        return { ok: false, error: error.message };
+      }
+    });
+
     ipcMain.handle("start-speech-recognition", () => {
       speechService.startRecording();
       return speechService.getStatus();
diff --git a/onboarding.html b/onboarding.html
@@ -909,6 +909,21 @@ <h1>Whisper Model Download</h1>
                 </div>
 
                 <div class="install-log" id="modelDownloadLog"></div>
+
+                <!-- Whisper test panel (shown after model is downloaded) -->
+                <div class="install-card" id="whisperTestCard" style="display:none; margin-top: 14px;">
+                    <div class="install-title">
+                        <i class="fas fa-microphone"></i>
+                        <span>Test Whisper</span>
+                    </div>
+                    <p style="font-size: 12px; color: var(--text-dim); margin-bottom: 12px;">
+                        Click the button below and speak a short sentence to confirm the microphone and Whisper model are working.
+                    </p>
+                    <button class="btn primary" id="testWhisperBtn" type="button">
+                        <i class="fas fa-microphone-lines"></i> Record 5-second test
+                    </button>
+                    <div id="whisperTestResult" style="margin-top: 12px; font-size: 12px; color: var(--text-dim);"></div>
+                </div>
             </section>
 
             <!-- SCREEN 6: Finish (star prompt + summary) -->
diff --git a/onboarding.js b/onboarding.js
@@ -39,6 +39,9 @@
     skippingWhisper: false,
     modelDownloadChoice: null, // 'now' | 'later'
     modelDownloading: false,
+    modelDownloaded: false,
+    whisperTestPassed: false,
+    whisperTesting: false,
     finished: false,
   };
 
@@ -73,8 +76,12 @@
     }
     refreshStepper();
     backBtn.style.visibility = state.step === 0 ? 'hidden' : 'visible';
-    // Reset next button state unless we're actively downloading a model
-    if (name !== 'model-download' || !state.modelDownloading) {
+    // Reset next button state unless we're actively downloading a model or awaiting a test
+    const awaitingTest = name === 'model-download' &&
+      state.modelDownloadChoice === 'now' &&
+      state.modelDownloaded &&
+      !state.whisperTestPassed;
+    if (name !== 'model-download' || (!state.modelDownloading && !state.whisperTesting && !awaitingTest)) {
       nextBtn.disabled = false;
       nextBtn.classList.remove('success');
       nextBtn.classList.add('primary');
@@ -131,7 +138,10 @@
         // Allow advancing whether whisper is detected OR user skipped
         return state.whisperDetected || state.skippingWhisper;
       case 'model-download':
-        return !!state.modelDownloadChoice && !state.modelDownloading;
+        if (!state.modelDownloadChoice || state.modelDownloading) return false;
+        // If user chose to download now, require a successful mic test before continuing.
+        if (state.modelDownloadChoice === 'now' && state.modelDownloaded && !state.whisperTestPassed) return false;
+        return true;
       case 'finish':
         return true;
       default:
@@ -407,19 +417,44 @@
           if (value === 'now') {
             // Start downloading the model immediately
             startModelDownload();
+          } else {
+            // 'later' doesn't need a test
+            nextBtn.disabled = false;
           }
         });
       });
+
+      // Wire up the Whisper test button
+      const testBtn = document.getElementById('testWhisperBtn');
+      if (testBtn) {
+        testBtn.addEventListener('click', runWhisperTest);
+      }
     }
 
     // Restore selection state when navigating back
     $$('#modelDownloadChoices .choice-card').forEach((card) => {
       card.classList.toggle('selected', card.dataset.value === state.modelDownloadChoice);
     });
 
-    // Re-enable continue button if a choice has been made (or download already completed)
-    if (state.modelDownloadChoice && !state.modelDownloading) {
-      nextBtn.disabled = false;
+    // Restore test panel visibility and state
+    const testCard = document.getElementById('whisperTestCard');
+    if (testCard && state.modelDownloadChoice === 'now' && state.modelDownloaded) {
+      testCard.style.display = 'block';
+    }
+    const testBtn = document.getElementById('testWhisperBtn');
+    if (testBtn && state.whisperTestPassed) {
+      testBtn.innerHTML = '<i class="fas fa-check-circle"></i> Test passed';
+      testBtn.classList.remove('primary');
+      testBtn.classList.add('success');
+    }
+
+    // Re-enable continue button if a choice has been made and (if applicable) test passed
+    if (state.modelDownloadChoice && !state.modelDownloading && !state.whisperTesting) {
+      if (state.modelDownloadChoice === 'now' && state.modelDownloaded && !state.whisperTestPassed) {
+        nextBtn.disabled = true;
+      } else {
+        nextBtn.disabled = false;
+      }
     }
   }
 
@@ -440,30 +475,71 @@
       const r = await window.electronAPI.downloadWhisperModel('turbo');
       state.modelDownloading = false;
       if (r.ok) {
+        state.modelDownloaded = true;
         appendModelLog(`\n✓ Model downloaded successfully: ${r.path}`);
-        // Re-enable continue so user can proceed to finish
-        nextBtn.disabled = false;
-        nextBtn.classList.remove('primary');
-        nextBtn.classList.add('success');
-        nextBtn.innerHTML = '<i class="fas fa-check-circle"></i> Continue';
+        // Show the Whisper mic test panel
+        const testCard = document.getElementById('whisperTestCard');
+        if (testCard) testCard.style.display = 'block';
+        // Keep Continue disabled until test passes
+        nextBtn.disabled = true;
       } else {
         appendModelLog(`\n✗ Download failed: ${r.message}`);
         // Let user continue anyway; they'll download on first use
         nextBtn.disabled = false;
-        nextBtn.innerHTML = 'Continue <i class="fas fa-arrow-right"></i>';
       }
     } catch (e) {
       state.modelDownloading = false;
       appendModelLog(`\n! Error: ${e.message || e}`);
       nextBtn.disabled = false;
-      nextBtn.innerHTML = 'Continue <i class="fas fa-arrow-right"></i>';
     } finally {
       if (progressHandler && window.electronAPI.removeAllListeners) {
         try { window.electronAPI.removeAllListeners('install-progress'); } catch (_) { /* ignore */ }
       }
     }
   }
 
+  async function runWhisperTest() {
+    const btn = document.getElementById('testWhisperBtn');
+    const resultEl = document.getElementById('whisperTestResult');
+    if (!btn || !window.electronAPI || !window.electronAPI.testWhisperRecording) return;
+
+    state.whisperTesting = true;
+    btn.disabled = true;
+    btn.innerHTML = '<span class="spinner"></span> Listening…';
+    if (resultEl) resultEl.textContent = 'Speak now…';
+
+    try {
+      const r = await window.electronAPI.testWhisperRecording();
+      if (r.ok) {
+        state.whisperTestPassed = true;
+        if (resultEl) resultEl.innerHTML = `<span style="color: var(--success);">✓ Heard:</span> “${escapeHtml(r.text)}”`;
+        btn.innerHTML = '<i class="fas fa-check-circle"></i> Test passed';
+        btn.classList.remove('primary');
+        btn.classList.add('success');
+        nextBtn.disabled = false;
+        nextBtn.classList.remove('primary');
+        nextBtn.classList.add('success');
+        nextBtn.innerHTML = '<i class="fas fa-check-circle"></i> Continue';
+      } else {
+        if (resultEl) resultEl.innerHTML = `<span style="color: var(--error);">✗ ${escapeHtml(r.error || 'Test failed')}</span>`;
+        btn.disabled = false;
+        btn.innerHTML = '<i class="fas fa-redo"></i> Retry test';
+      }
+    } catch (e) {
+      if (resultEl) resultEl.innerHTML = `<span style="color: var(--error);">✗ ${escapeHtml(e.message || e)}</span>`;
+      btn.disabled = false;
+      btn.innerHTML = '<i class="fas fa-redo"></i> Retry test';
+    } finally {
+      state.whisperTesting = false;
+    }
+  }
+
+  function escapeHtml(text) {
+    const div = document.createElement('div');
+    div.textContent = text;
+    return div.innerHTML;
+  }
+
   // ── Wire up: Finish screen ────────────────────────────────────────
   function populateSummary() {
     const rows = [];
diff --git a/preload.js b/preload.js
@@ -49,6 +49,7 @@ contextBridge.exposeInMainWorld('electronAPI', {
   detectWhisper: () => ipcRenderer.invoke('detect-whisper'),
   installWhisper: () => ipcRenderer.invoke('install-whisper'),
   downloadWhisperModel: (modelName) => ipcRenderer.invoke('download-whisper-model', modelName),
+  testWhisperRecording: () => ipcRenderer.invoke('test-whisper-recording'),
   onInstallProgress: (callback) => {
     const wrapped = (_event, line) => {
       try { callback(line); } catch (e) { console.error('onInstallProgress error:', e); }
diff --git a/src/core/whisper-installer.js b/src/core/whisper-installer.js
@@ -528,51 +528,41 @@ class WhisperInstaller {
     }
 
     const command = detectResult.command;
-    log(`→ Downloading ${modelName} model using ${command}…`);
+    log(`→ Preparing to download ${modelName} model…`);
 
-    // Parse the command to get the python executable and module
-    let pythonCmd, moduleName;
+    // Parse the command to get the python executable that owns the whisper module.
+    let pythonCmd;
     if (command.includes(' -m ')) {
-      const parts = command.split(' -m ');
-      pythonCmd = parts[0].trim();
-      moduleName = parts[1].trim();
+      pythonCmd = command.split(' -m ')[0].trim();
     } else if (command.endsWith(' -m whisper')) {
       pythonCmd = command.replace(' -m whisper', '').trim();
-      moduleName = 'whisper';
     } else {
-      // Fallback: assume it's a direct whisper command
-      pythonCmd = 'python3';
-      moduleName = 'whisper';
+      // The command is a whisper binary (likely inside the venv). Derive the
+      // sibling python interpreter from the venv layout.
+      const binDir = path.dirname(command);
+      const isWin = this.platform === 'win32';
+      const pythonExe = isWin ? 'python.exe' : 'python';
+      const candidate = path.join(binDir, pythonExe);
+      if (fs.existsSync(candidate)) {
+        pythonCmd = candidate;
+      } else {
+        pythonCmd = isWin ? 'python' : 'python3';
+      }
     }
 
-    const result = await this.runExec(pythonCmd, ['-m', moduleName, '--model', modelName, '--help'], {
-      timeout: 30000,
+    // whisper.load_model() downloads the weights lazily and prints progress
+    // to stderr. We capture that output and relay it via onProgress.
+    log(`→ Downloading ${modelName} weights (this may take a minute)…`);
+    const loadResult = await this.runExec(pythonCmd, [
+      '-c',
+      `import whisper; whisper.load_model('${modelName}'); print('model_loaded')`
+    ], {
+      timeout: 600000,
       onProgress: log,
     });
 
-    if (!result.ok) {
-      // Try running a small transcription to trigger download
-      log(`→ Triggering model download via test transcription…`);
-      const testResult = await this.runExec(pythonCmd, ['-m', moduleName, '--model', modelName, '--language', 'en', '/dev/null'], {
-        timeout: 120000,
-        onProgress: log,
-      });
-      
-      if (!testResult.ok) {
-        // Check if it's just a file not found error (model downloading)
-        if (testResult.stderr && testResult.stderr.includes('Downloading')) {
-          // Wait for download to complete
-          const downloadResult = await this.runExec(pythonCmd, ['-m', moduleName, '--model', modelName, '--help'], {
-            timeout: 300000,
-            onProgress: log,
-          });
-          if (downloadResult.ok) {
-            const modelPath = this._getModelPath(modelName);
-            return { ok: true, message: `Model ${modelName} downloaded successfully`, path: modelPath };
-          }
-        }
-        return { ok: false, message: testResult.stderr || testResult.error };
-      }
+    if (!loadResult.ok) {
+      return { ok: false, message: loadResult.stderr || loadResult.error };
     }
 
     const modelPath = this._getModelPath(modelName);
diff --git a/src/services/speech.service.js b/src/services/speech.service.js
@@ -818,6 +818,62 @@ class SpeechService extends EventEmitter {
     this._audioDataLogged = false;
   }
 
+  /**
+   * Record a short audio clip and return the transcription.
+   * Used by the onboarding wizard to verify the Whisper model works.
+   */
+  async testWhisperRecording(durationMs = 5000) {
+    return new Promise((resolve, reject) => {
+      if (!this.isAvailable()) {
+        reject(new Error('Speech recognition is not available'));
+        return;
+      }
+
+      let timeoutId = null;
+      let transcriptionReceived = false;
+
+      const onTranscription = (text) => {
+        if (transcriptionReceived) return;
+        transcriptionReceived = true;
+        cleanup();
+        resolve({ ok: true, text: text.trim() });
+      };
+
+      const onError = (error) => {
+        if (transcriptionReceived) return;
+        cleanup();
+        reject(new Error(typeof error === 'string' ? error : error?.message || 'Speech test failed'));
+      };
+
+      const cleanup = () => {
+        if (timeoutId) clearTimeout(timeoutId);
+        this.off('transcription', onTranscription);
+        this.off('error', onError);
+      };
+
+      this.once('transcription', onTranscription);
+      this.once('error', onError);
+
+      try {
+        this.startRecording();
+        timeoutId = setTimeout(() => {
+          if (transcriptionReceived) return;
+          this.stopRecording();
+          // Give Whisper a short window to emit transcription after stop
+          setTimeout(() => {
+            if (!transcriptionReceived) {
+              cleanup();
+              reject(new Error('No transcription received. Speak louder or check your microphone.'));
+            }
+          }, 3000);
+        }, durationMs);
+      } catch (error) {
+        cleanup();
+        reject(error);
+      }
+    });
+  }
+
   async recognizeFromFile(audioFilePath) {
     if (this.provider === 'azure') {
       if (!this.speechConfig) {