@@ -52,6 +52,7 @@ const MAX_SECTION_INSTRUCTION_LENGTH = 1200;
5252const AUTO_UPDATE_REPO = "qyinm/MirrorNote" ;
5353const AUTO_UPDATE_INTERVAL = "30 minutes" ;
5454const DEFAULT_AUDIO_DEVICE_ID = "default" ;
55+ const LOCAL_STT_STREAM_CHUNK_SECONDS = 5 ;
5556
5657const LOCAL_STT_MODELS = [
5758 {
@@ -1210,6 +1211,9 @@ async function downloadLocalSTTModel(modelID, targetWebContents = null) {
12101211 await fs . rm ( destinationPath , { force : true } ) ;
12111212 await fs . rename ( temporaryPath , destinationPath ) ;
12121213 await writeElectronSettings ( { selectedLocalSTTModelID : model . id } ) ;
1214+ warmLocalSTTServer ( ) . catch ( ( error ) => {
1215+ console . error ( "Local STT warmup failed after model download:" , error ) ;
1216+ } ) ;
12131217 updateTrayMenu ( ) ;
12141218 sendSTTModelDownloadProgress ( targetWebContents , {
12151219 modelID : model . id ,
@@ -2387,10 +2391,23 @@ async function retryTranscript(id) {
23872391 throw new Error ( "This note does not have a playable recording to transcribe." ) ;
23882392 }
23892393
2390- const segments = await transcribeRecordingFile ( paths . recordingPath , transcriptionConfiguration ) ;
2394+ const segments = await transcribeRecordingFile ( paths . recordingPath , transcriptionConfiguration , {
2395+ onSegments : async ( liveSegments ) => {
2396+ await writeLiveTranscript ( paths , liveSegments ) ;
2397+ } ,
2398+ } ) ;
23912399 return saveTranscript ( id , segments ) ;
23922400}
23932401
2402+ async function writeLiveTranscript ( paths , segments ) {
2403+ const normalizedSegments = normalizeTranscriptSegments ( segments ) ;
2404+ await writeFileAtomic ( paths . transcriptPath , serializeTranscriptJSONL ( normalizedSegments ) , "utf8" ) ;
2405+
2406+ const metadata = await readJSONIfExists ( paths . metadataPath ) || { } ;
2407+ metadata . transcriptSegmentCount = normalizedSegments . length ;
2408+ await writeJSONFileAtomic ( paths . metadataPath , metadata ) ;
2409+ }
2410+
23942411async function promptForFileUpload ( targetWindow , title , filters ) {
23952412 const result = await dialog . showOpenDialog ( targetWindow || undefined , {
23962413 title,
@@ -3154,10 +3171,10 @@ async function fetchAudioDeviceInventory() {
31543171 }
31553172}
31563173
3157- async function transcribeRecordingFile ( recordingPath , transcriptionConfiguration ) {
3174+ async function transcribeRecordingFile ( recordingPath , transcriptionConfiguration , options = { } ) {
31583175 const sttInputPath = await prepareLocalSTTAudioInput ( recordingPath ) ;
31593176 try {
3160- return await transcribeLocalSTTAudioFile ( sttInputPath , transcriptionConfiguration ) ;
3177+ return await transcribeLocalSTTAudioFile ( sttInputPath , transcriptionConfiguration , options ) ;
31613178 } finally {
31623179 if ( sttInputPath !== recordingPath ) {
31633180 await fs . rm ( path . dirname ( sttInputPath ) , { recursive : true , force : true } ) . catch ( ( ) => { } ) ;
@@ -3205,7 +3222,231 @@ function runProcess(command, args) {
32053222 } ) ;
32063223}
32073224
3208- async function transcribeLocalSTTAudioFile ( recordingPath , transcriptionConfiguration ) {
3225+ async function transcribeLocalSTTAudioFile ( recordingPath , transcriptionConfiguration , options = { } ) {
3226+ const sessionID = randomUUID ( ) ;
3227+ const streamedSegments = [ ] ;
3228+ const fallbackSegments = [ ] ;
3229+ let lastLiveSegmentCount = 0 ;
3230+ const unsubscribe = localSTTServer . onEvent ( ( event ) => {
3231+ if ( event ?. sessionID !== sessionID ) {
3232+ return ;
3233+ }
3234+ const captureEvent = captureEventFromLocalSTTEvent ( event ) ;
3235+ if ( captureEvent ) {
3236+ broadcastCaptureEvent ( captureEvent ) ;
3237+ }
3238+ if ( ( event ?. eventType === "partial_segment" || event ?. eventType === "final_segment" ) && event . segment ) {
3239+ streamedSegments . push ( event . segment ) ;
3240+ }
3241+ } ) ;
3242+ const flushLiveSegments = async ( ) => {
3243+ if ( typeof options . onSegments !== "function" || streamedSegments . length === lastLiveSegmentCount ) {
3244+ return ;
3245+ }
3246+ lastLiveSegmentCount = streamedSegments . length ;
3247+ await options . onSegments ( normalizeTranscriptSegments ( streamedSegments ) ) ;
3248+ } ;
3249+
3250+ try {
3251+ await sendLocalSTTServerRequest ( {
3252+ version : 4 ,
3253+ id : randomUUID ( ) ,
3254+ command : "start_session" ,
3255+ sessionID,
3256+ configuration : transcriptionConfiguration ,
3257+ } ) ;
3258+
3259+ for await ( const chunk of readLocalSTTPcmChunks ( recordingPath , LOCAL_STT_STREAM_CHUNK_SECONDS ) ) {
3260+ const response = await sendLocalSTTServerRequest ( {
3261+ version : 4 ,
3262+ id : randomUUID ( ) ,
3263+ command : "push_audio_chunk" ,
3264+ sessionID,
3265+ pcmSamples : chunk . samples ,
3266+ sampleRate : chunk . sampleRate ,
3267+ } ) ;
3268+ if ( Array . isArray ( response . segments ) ) {
3269+ fallbackSegments . push ( ...response . segments ) ;
3270+ }
3271+ await flushLiveSegments ( ) ;
3272+ }
3273+
3274+ const finalResponse = await sendLocalSTTServerRequest ( {
3275+ version : 4 ,
3276+ id : randomUUID ( ) ,
3277+ command : "finalize_session" ,
3278+ sessionID,
3279+ } ) ;
3280+ if ( Array . isArray ( finalResponse . segments ) ) {
3281+ fallbackSegments . push ( ...finalResponse . segments ) ;
3282+ }
3283+ await flushLiveSegments ( ) ;
3284+
3285+ return normalizeTranscriptSegments ( streamedSegments . length > 0 ? streamedSegments : fallbackSegments ) ;
3286+ } catch ( error ) {
3287+ await sendLocalSTTServerRequest ( {
3288+ version : 4 ,
3289+ id : randomUUID ( ) ,
3290+ command : "cancel_session" ,
3291+ sessionID,
3292+ } ) . catch ( ( ) => { } ) ;
3293+ throw error ;
3294+ } finally {
3295+ unsubscribe ( ) ;
3296+ }
3297+ }
3298+
3299+ function captureEventFromLocalSTTEvent ( event ) {
3300+ if ( ! event || typeof event !== "object" ) {
3301+ return null ;
3302+ }
3303+ if ( ( event . eventType === "partial_segment" || event . eventType === "final_segment" ) && event . segment ) {
3304+ return {
3305+ kind : "transcriptionSegment" ,
3306+ transcriptionSegment : event . segment ,
3307+ } ;
3308+ }
3309+ if ( event . eventType === "progress" ) {
3310+ return {
3311+ kind : "transcriptionProgress" ,
3312+ transcriptionProgress : {
3313+ processedSeconds : Number ( event . progress ?. processedSeconds || 0 ) ,
3314+ totalSeconds : Number ( event . progress ?. receivedSeconds || 0 ) ,
3315+ } ,
3316+ } ;
3317+ }
3318+ if ( event . eventType === "error" ) {
3319+ return {
3320+ kind : "failed" ,
3321+ errorMessage : event . error ?. message || "Local STT failed." ,
3322+ } ;
3323+ }
3324+ if ( event . eventType === "session_finished" ) {
3325+ return {
3326+ kind : "stateChanged" ,
3327+ state : "finalizing" ,
3328+ detail : "Local transcription finished." ,
3329+ } ;
3330+ }
3331+ return null ;
3332+ }
3333+
3334+ async function * readLocalSTTPcmChunks ( recordingPath , chunkDurationSeconds ) {
3335+ const file = await fs . open ( recordingPath , "r" ) ;
3336+ try {
3337+ const header = await readPcm16WavHeader ( file ) ;
3338+ const framesPerChunk = Math . max ( 1 , Math . floor ( header . sampleRate * chunkDurationSeconds ) ) ;
3339+ const bytesPerChunk = framesPerChunk * header . blockAlign ;
3340+ const buffer = Buffer . alloc ( bytesPerChunk ) ;
3341+ let position = header . dataOffset ;
3342+ let remaining = header . dataSize ;
3343+
3344+ while ( remaining > 0 ) {
3345+ const bytesToRead = Math . min ( buffer . length , remaining ) ;
3346+ const alignedBytesToRead = bytesToRead - ( bytesToRead % header . blockAlign ) ;
3347+ if ( alignedBytesToRead <= 0 ) {
3348+ break ;
3349+ }
3350+ const { bytesRead } = await file . read ( buffer , 0 , alignedBytesToRead , position ) ;
3351+ if ( bytesRead <= 0 ) {
3352+ break ;
3353+ }
3354+ const alignedBytesRead = bytesRead - ( bytesRead % header . blockAlign ) ;
3355+ if ( alignedBytesRead <= 0 ) {
3356+ break ;
3357+ }
3358+ yield {
3359+ sampleRate : header . sampleRate ,
3360+ samples : pcm16BufferToMonoFloat32 ( buffer . subarray ( 0 , alignedBytesRead ) , header . channels ) ,
3361+ } ;
3362+ position += alignedBytesRead ;
3363+ remaining -= alignedBytesRead ;
3364+ }
3365+ } finally {
3366+ await file . close ( ) ;
3367+ }
3368+ }
3369+
3370+ async function readPcm16WavHeader ( file ) {
3371+ const stat = await file . stat ( ) ;
3372+ const riffHeader = Buffer . alloc ( 12 ) ;
3373+ const riffRead = await file . read ( riffHeader , 0 , riffHeader . length , 0 ) ;
3374+ if ( riffRead . bytesRead !== riffHeader . length || riffHeader . subarray ( 0 , 4 ) . toString ( "ascii" ) !== "RIFF" || riffHeader . subarray ( 8 , 12 ) . toString ( "ascii" ) !== "WAVE" ) {
3375+ throw new Error ( "unsupported WAV format for local STT: expected RIFF/WAVE data" ) ;
3376+ }
3377+
3378+ let cursor = 12 ;
3379+ let sampleRate = 0 ;
3380+ let channels = 0 ;
3381+ let bitsPerSample = 0 ;
3382+ let audioFormat = 0 ;
3383+ let dataOffset = 0 ;
3384+ let dataSize = 0 ;
3385+ const chunkHeader = Buffer . alloc ( 8 ) ;
3386+
3387+ while ( cursor + chunkHeader . length <= stat . size ) {
3388+ const { bytesRead } = await file . read ( chunkHeader , 0 , chunkHeader . length , cursor ) ;
3389+ if ( bytesRead !== chunkHeader . length ) {
3390+ break ;
3391+ }
3392+ const chunkID = chunkHeader . subarray ( 0 , 4 ) . toString ( "ascii" ) ;
3393+ const chunkSize = chunkHeader . readUInt32LE ( 4 ) ;
3394+ cursor += 8 ;
3395+ if ( cursor + chunkSize > stat . size ) {
3396+ break ;
3397+ }
3398+
3399+ if ( chunkID === "fmt " ) {
3400+ if ( chunkSize < 16 ) {
3401+ throw new Error ( "unsupported WAV format for local STT: invalid fmt chunk" ) ;
3402+ }
3403+ const fmtBuffer = Buffer . alloc ( 16 ) ;
3404+ const fmtRead = await file . read ( fmtBuffer , 0 , fmtBuffer . length , cursor ) ;
3405+ if ( fmtRead . bytesRead !== fmtBuffer . length ) {
3406+ throw new Error ( "unsupported WAV format for local STT: invalid fmt chunk" ) ;
3407+ }
3408+ audioFormat = fmtBuffer . readUInt16LE ( 0 ) ;
3409+ channels = fmtBuffer . readUInt16LE ( 2 ) ;
3410+ sampleRate = fmtBuffer . readUInt32LE ( 4 ) ;
3411+ bitsPerSample = fmtBuffer . readUInt16LE ( 14 ) ;
3412+ } else if ( chunkID === "data" ) {
3413+ dataOffset = cursor ;
3414+ dataSize = chunkSize ;
3415+ break ;
3416+ }
3417+
3418+ cursor += chunkSize + ( chunkSize % 2 ) ;
3419+ }
3420+
3421+ if ( audioFormat !== 1 || bitsPerSample !== 16 || channels <= 0 || sampleRate <= 0 || dataOffset <= 0 || dataSize <= 0 ) {
3422+ throw new Error ( "unsupported WAV format for local STT: only PCM16 WAV input is supported" ) ;
3423+ }
3424+
3425+ return {
3426+ sampleRate,
3427+ channels,
3428+ blockAlign : channels * 2 ,
3429+ dataOffset,
3430+ dataSize,
3431+ } ;
3432+ }
3433+
3434+ function pcm16BufferToMonoFloat32 ( buffer , channels ) {
3435+ const blockAlign = channels * 2 ;
3436+ const frameCount = Math . floor ( buffer . length / blockAlign ) ;
3437+ const samples = new Array ( frameCount ) ;
3438+ for ( let frame = 0 ; frame < frameCount ; frame += 1 ) {
3439+ let sum = 0 ;
3440+ const frameOffset = frame * blockAlign ;
3441+ for ( let channel = 0 ; channel < channels ; channel += 1 ) {
3442+ sum += buffer . readInt16LE ( frameOffset + channel * 2 ) / 32768.0 ;
3443+ }
3444+ samples [ frame ] = sum / channels ;
3445+ }
3446+ return samples ;
3447+ }
3448+
3449+ async function transcribeLocalSTTAudioFileLegacy ( recordingPath , transcriptionConfiguration ) {
32093450 const request = {
32103451 version : 4 ,
32113452 id : randomUUID ( ) ,
@@ -3228,6 +3469,7 @@ class LocalSTTServerBridge {
32283469 this . pending = new Map ( ) ;
32293470 this . stdoutBuffer = Buffer . alloc ( 0 ) ;
32303471 this . stderrTail = [ ] ;
3472+ this . eventHandlers = new Set ( ) ;
32313473 }
32323474
32333475 ensureStarted ( ) {
@@ -3291,6 +3533,11 @@ class LocalSTTServerBridge {
32913533 return ;
32923534 }
32933535
3536+ if ( response . event ) {
3537+ this . emitEvent ( response ) ;
3538+ continue ;
3539+ }
3540+
32943541 const pending = this . pending . get ( response . id ) ;
32953542 if ( ! pending ) {
32963543 continue ;
@@ -3306,6 +3553,23 @@ class LocalSTTServerBridge {
33063553 }
33073554 }
33083555
3556+ emitEvent ( response ) {
3557+ for ( const handler of this . eventHandlers ) {
3558+ try {
3559+ handler ( response . event , response ) ;
3560+ } catch ( error ) {
3561+ console . error ( "Local STT event handler failed:" , error ) ;
3562+ }
3563+ }
3564+ }
3565+
3566+ onEvent ( handler ) {
3567+ this . eventHandlers . add ( handler ) ;
3568+ return ( ) => {
3569+ this . eventHandlers . delete ( handler ) ;
3570+ } ;
3571+ }
3572+
33093573 request ( request ) {
33103574 this . ensureStarted ( ) ;
33113575 return new Promise ( ( resolve , reject ) => {
@@ -3363,6 +3627,22 @@ function sendLocalSTTServerRequest(request) {
33633627 return localSTTServer . request ( request ) ;
33643628}
33653629
3630+ async function warmLocalSTTServer ( ) {
3631+ if ( process . env . MIRROR_NOTE_TEST_EXPORTS === "1" ) {
3632+ return ;
3633+ }
3634+ const { selectedModel, transcriptionConfiguration } = await selectedTranscriptionConfiguration ( ) ;
3635+ if ( ! selectedModel || ! transcriptionConfiguration ) {
3636+ return ;
3637+ }
3638+ await sendLocalSTTServerRequest ( {
3639+ version : 4 ,
3640+ id : `warm-stt-${ Date . now ( ) } ` ,
3641+ command : "prepare_model" ,
3642+ configuration : transcriptionConfiguration ,
3643+ } ) ;
3644+ }
3645+
33663646async function fetchCaptureCapabilities ( ) {
33673647 const { transcriptionConfiguration } = await selectedTranscriptionConfiguration ( ) ;
33683648 if ( ! transcriptionConfiguration ) {
@@ -3690,7 +3970,11 @@ function registerIPCHandlers() {
36903970 ipcMain . handle ( "settings:get" , ( ) => getSettings ( ) ) ;
36913971 ipcMain . handle ( "settings:update" , async ( _event , patch ) => {
36923972 await writeElectronSettings ( patch || { } ) ;
3693- return getSettings ( ) ;
3973+ const settings = await getSettings ( ) ;
3974+ warmLocalSTTServer ( ) . catch ( ( error ) => {
3975+ console . error ( "Local STT warmup failed after settings update:" , error ) ;
3976+ } ) ;
3977+ return settings ;
36943978 } ) ;
36953979 ipcMain . handle ( "settings:download-stt-model" , ( event , modelID ) => downloadLocalSTTModel ( modelID , event . sender ) ) ;
36963980 ipcMain . handle ( "settings:cancel-stt-model-download" , ( _event , modelID ) => cancelLocalSTTModelDownload ( modelID ) ) ;
@@ -3729,6 +4013,9 @@ function startElectronApp() {
37294013 createApplicationMenu ( ) ;
37304014 createWindow ( ) ;
37314015 createTray ( ) ;
4016+ warmLocalSTTServer ( ) . catch ( ( error ) => {
4017+ console . error ( "Local STT warmup failed:" , error ) ;
4018+ } ) ;
37324019 } ) ;
37334020
37344021 app . on ( "activate" , ( ) => {
0 commit comments