@@ -40,6 +40,10 @@ class IndexedDBCache {
4040 }
4141}
4242
43+ /**
44+ * A class that mimics the EasySpeech API but uses the Microsoft Edge
45+ * streaming TTS service via a Cloudflare Worker farm.
46+ */
4347class StreamingTTS {
4448 #workers = [
4549 { base : "https://silent-unit-b6ca.hellpanderrr.workers.dev" , lastUsed : 0 } ,
@@ -48,7 +52,6 @@ class StreamingTTS {
4852 { base : "https://tts-4.hellpanderrr.workers.dev" , lastUsed : 0 } ,
4953 { base : "https://tts-5.hellpanderrr.workers.dev" , lastUsed : 0 } ,
5054 { base : "https://tts-6.hellpanderrr.workers.dev" , lastUsed : 0 }
51-
5255 ] ;
5356
5457 #requestDelayMs = 3000 ;
@@ -94,10 +97,23 @@ class StreamingTTS {
9497 async init ( ) {
9598 if ( this . #isInitialized) return true ;
9699
97- for ( let i = 0 ; i < this . #workers. length ; i ++ ) {
98- const worker = this . #getBestWorker( ) ;
99- worker . lastUsed = Date . now ( ) ;
100+ // 1. Try Direct Microsoft URL FIRST (Usually much faster and doesn't get blocked for /voices)
101+ try {
102+ const directUrl = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4" ;
103+ const response = await fetch ( directUrl ) ;
104+ if ( ! response . ok ) throw new Error ( `Direct error: ${ response . status } ` ) ;
105+ const data = await response . json ( ) ;
106+ this . #voices = this . #transformVoiceList( data ) ;
107+ this . #isInitialized = true ;
108+ console . debug ( "StreamingTTS initialized via Direct MS URL." ) ;
109+ return true ;
110+ } catch ( directError ) {
111+ console . warn ( "Direct voice fetch failed, attempting proxy workers..." , directError ) ;
112+ }
100113
114+ // 2. Fallback to Proxies if direct fails
115+ for ( let i = 0 ; i < this . #workers. length ; i ++ ) {
116+ const worker = this . #workers[ i ] ;
101117 try {
102118 const response = await fetch ( `${ worker . base } /voices` ) ;
103119 if ( ! response . ok ) throw new Error ( `Proxy error: ${ response . status } ` ) ;
@@ -112,27 +128,16 @@ class StreamingTTS {
112128 }
113129 }
114130
115- console . warn ( "All proxy workers failed, attempting direct fallback..." ) ;
116-
117- try {
118- const directUrl = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4" ;
119- const response = await fetch ( directUrl ) ;
120- if ( ! response . ok ) throw new Error ( `Direct error: ${ response . status } ` ) ;
121- const data = await response . json ( ) ;
122- this . #voices = this . #transformVoiceList( data ) ;
123- this . #isInitialized = true ;
124- return true ;
125- } catch ( directError ) {
126- console . error ( "All voice fetch methods failed." , directError ) ;
127- this . #voices = [ {
128- name : "Microsoft Aria Online (Natural) - English (United States)" ,
129- lang : "en-US" ,
130- default : true ,
131- raw : { ShortName : "en-US-AriaNeural" }
132- } ] ;
133- this . #isInitialized = true ;
134- return true ;
135- }
131+ // 3. Ultimate Fallback
132+ console . error ( "All voice fetch methods failed. Using default voice." ) ;
133+ this . #voices = [ {
134+ name : "Microsoft Aria Online (Natural) - English (United States)" ,
135+ lang : "en-US" ,
136+ default : true ,
137+ raw : { ShortName : "en-US-AriaNeural" }
138+ } ] ;
139+ this . #isInitialized = true ;
140+ return true ;
136141 }
137142
138143 voices ( ) {
@@ -192,7 +197,7 @@ class StreamingTTS {
192197 const maxAttempts = this . #workers. length ;
193198 let audioBlob = null ;
194199
195- // 2. Worker rotation loop (instant retry on error, NO delays here)
200+ // 2. Worker rotation loop
196201 while ( attempts < maxAttempts ) {
197202 if ( this . #currentAbortController. signal . aborted ) return ;
198203
@@ -224,7 +229,7 @@ class StreamingTTS {
224229 }
225230
226231 audioBlob = blob ;
227- break ; // Success, exit retry loop
232+ break ;
228233
229234 } catch ( error ) {
230235 if ( error . name === 'AbortError' ) {
@@ -240,9 +245,7 @@ class StreamingTTS {
240245 if ( this . #enableCache) {
241246 this . #cache. saveAudio ( cacheKey , audioBlob ) ;
242247 }
243-
244248 if ( this . #currentAbortController. signal . aborted ) return ;
245-
246249 this . #playBlob( audioBlob ) ;
247250 } else {
248251 console . error ( "All workers failed." ) ;
@@ -295,10 +298,12 @@ try {
295298} catch ( error ) {
296299 console . log ( "Failed to load Edge TTS engine: " , error ) ;
297300}
301+
298302const engines = {
299303 browser : EasySpeech ,
300304 edge : EdgeTTS ,
301305} ;
306+
302307let activeEngine = engines . browser ;
303308let activeEngineName = "browser" ;
304309
@@ -320,9 +325,10 @@ function populateVoiceList(langCode) {
320325 option . setAttribute ( "data-name" , displayName ) ;
321326 voiceSelect . appendChild ( option ) ;
322327 } ) ;
328+
323329 if ( langCode ) {
324- const voices = Array . from ( voiceSelect . options ) ;
325- const relevantVoices = voices . filter ( ( option ) =>
330+ const optionsArray = Array . from ( voiceSelect . options ) ;
331+ const relevantVoices = optionsArray . filter ( ( option ) =>
326332 ( option . getAttribute ( "data-lang" ) || "" ) . includes ( langCode ) ,
327333 ) ;
328334 if ( relevantVoices . length > 0 ) {
@@ -338,7 +344,6 @@ function getSelectedVoice() {
338344 ?. selectedOptions [ 0 ] ?. getAttribute ( "data-name" ) ;
339345
340346 if ( ! selectedVoiceName ) return voices [ 0 ] ;
341-
342347 return voices . find ( ( v ) => ( v . name || v . friendlyName ) === selectedVoiceName ) ;
343348}
344349
@@ -348,24 +353,17 @@ function tts(transcriptionMode) {
348353 ? document . querySelectorAll ( ".input_text" )
349354 : document . querySelectorAll ( "#result span" ) ;
350355 const lineButtons = document . querySelectorAll ( ".audio-popup-line" ) ;
351- const getVolume = ( ) =>
352- parseFloat ( document . querySelector ( "#tts_volume" ) . value ) / 100 ;
353- const getSpeed = ( ) =>
354- parseFloat ( document . querySelector ( "#tts_speed" ) . value ) / 100 ;
356+ const getVolume = ( ) => parseFloat ( document . querySelector ( "#tts_volume" ) . value ) / 100 ;
357+ const getSpeed = ( ) => parseFloat ( document . querySelector ( "#tts_speed" ) . value ) / 100 ;
355358
356359 lineButtons . forEach ( ( button ) => {
357360 button . addEventListener ( "click" , ( e ) => {
358- let lineText = Array . from (
359- e . currentTarget . parentElement . querySelectorAll ( ".input_text" ) ,
360- )
361- . map ( ( x ) => x . textContent )
362- . join ( " " ) ;
361+ let lineText = Array . from ( e . currentTarget . parentElement . querySelectorAll ( ".input_text" ) )
362+ . map ( ( x ) => x . textContent ) . join ( " " ) ;
363363 if ( ! lineText )
364- lineText = Array . from (
365- e . currentTarget . parentElement . querySelectorAll ( ".ipa" ) ,
366- )
367- . map ( ( x ) => x . getAttribute ( "data-word" ) )
368- . join ( " " ) ;
364+ lineText = Array . from ( e . currentTarget . parentElement . querySelectorAll ( ".ipa" ) )
365+ . map ( ( x ) => x . getAttribute ( "data-word" ) ) . join ( " " ) ;
366+
369367 activeEngine . speak ( {
370368 text : lineText ,
371369 voice : getSelectedVoice ( ) ,
@@ -380,6 +378,7 @@ function tts(transcriptionMode) {
380378 let timer ;
381379 const popup = el . previousElementSibling ;
382380 if ( ! popup ) return ;
381+
383382 const getTextContent = ( el ) => {
384383 switch ( transcriptionMode ) {
385384 case "default" :
@@ -392,6 +391,7 @@ function tts(transcriptionMode) {
392391 return el . textContent ;
393392 }
394393 } ;
394+
395395 popup . addEventListener ( "click" , ( ) =>
396396 activeEngine . speak ( {
397397 text : getTextContent ( el ) ,
@@ -401,6 +401,7 @@ function tts(transcriptionMode) {
401401 volume : getVolume ( )
402402 } ) ,
403403 ) ;
404+
404405 el . addEventListener ( "mouseenter" , ( ) => {
405406 popup . style . opacity = "1" ;
406407 popup . classList . add ( "show-popup" ) ;
@@ -412,6 +413,7 @@ function tts(transcriptionMode) {
412413 } , 3000 ) ;
413414 } ) ;
414415 } ) ;
416+
415417 el . addEventListener ( "mouseleave" , ( ) => {
416418 timer = setTimeout ( ( ) => {
417419 popup . style . opacity = "0" ;
@@ -438,7 +440,6 @@ let voiceSelect = null;
438440function setLanguageAndFindVoice ( language ) {
439441 try {
440442 if ( ! voiceSelect ) return ;
441-
442443 const normalizedLanguage = language . replace ( / _ / g, "-" ) ;
443444
444445 const currentVoices = activeEngine . voices ( ) ;
@@ -451,20 +452,15 @@ function setLanguageAndFindVoice(language) {
451452 return ;
452453 }
453454
454- const otherEngineNames = availableEngineNames . filter (
455- ( name ) => name !== activeEngineName ,
456- ) ;
455+ const otherEngineNames = availableEngineNames . filter ( ( name ) => name !== activeEngineName ) ;
457456 for ( const engineName of otherEngineNames ) {
458457 const otherEngine = engines [ engineName ] ;
459- bestVoice = otherEngine
460- . voices ( )
461- . find ( ( v ) => v . lang . replace ( / _ / g, "-" ) . startsWith ( normalizedLanguage ) ) ;
458+ bestVoice = otherEngine . voices ( ) . find ( ( v ) => v . lang . replace ( / _ / g, "-" ) . startsWith ( normalizedLanguage ) ) ;
462459
463460 if ( bestVoice ) {
464461 activeEngineName = engineName ;
465462 activeEngine = otherEngine ;
466463 engineSwitch . value = engineName ;
467-
468464 populateVoiceList ( ) ;
469465 voiceSelect . value = bestVoice . name || bestVoice . friendlyName ;
470466 return ;
@@ -475,53 +471,72 @@ function setLanguageAndFindVoice(language) {
475471 }
476472}
477473
474+ // ============================================================================
475+ // == Parallel Initialization (Decoupled)
476+ // ============================================================================
478477try {
479- ( async ( ) => {
480- engineSwitch = document . querySelector ( "#tts_switch" ) ;
481- voiceSelect = document . querySelector ( "#tts" ) ;
482-
483- const results = await Promise . allSettled ( [
484- EasySpeech . init ( { maxTimeout : 5000 , interval : 250 } ) ,
485- EdgeTTS . init ( ) ,
486- ] ) ;
487- if ( results [ 0 ] . status === "fulfilled" ) availableEngineNames . push ( "browser" ) ;
488- if ( results [ 1 ] . status === "fulfilled" ) availableEngineNames . push ( "edge" ) ;
489-
490- const browserSuccess = availableEngineNames . includes ( "browser" ) ;
491- const edgeSuccess = availableEngineNames . includes ( "edge" ) ;
492-
493- if ( ! browserSuccess && ! edgeSuccess ) {
494- engineSwitch . innerHTML = "<option>TTS Unavailable</option>" ;
495- return ;
496- }
478+ engineSwitch = document . querySelector ( "#tts_switch" ) ;
479+ voiceSelect = document . querySelector ( "#tts" ) ;
497480
498- if ( ! edgeSuccess ) {
499- const edgeOption = engineSwitch . options [ 1 ] ;
500- edgeOption . disabled = true ;
501- edgeOption . textContent += " (Unavailable )" ;
502- }
481+ // Indicate that Edge is loading initially
482+ if ( engineSwitch && engineSwitch . options [ 1 ] ) {
483+ engineSwitch . options [ 1 ] . disabled = true ;
484+ engineSwitch . options [ 1 ] . textContent += " (Loading... )" ;
485+ }
503486
504- if ( ! browserSuccess ) {
505- engineSwitch . options [ 0 ] . disabled = true ;
506- if ( edgeSuccess ) {
507- engineSwitch . value = "edge" ;
508- activeEngineName = "edge" ;
509- activeEngine = engines . edge ;
510- }
511- }
487+ // 1. Initialize Browser Engine (Fast)
488+ EasySpeech . init ( { maxTimeout : 5000 , interval : 250 } )
489+ . then ( ( ) => {
490+ availableEngineNames . push ( "browser" ) ;
491+ if ( activeEngineName === "browser" ) {
492+ populateVoiceList ( ) ; // Populate immediately
493+ }
494+ } )
495+ . catch ( ( error ) => {
496+ console . error ( "Standard browser TTS engine failed to load." , error ) ;
497+ if ( engineSwitch && engineSwitch . options [ 0 ] ) {
498+ engineSwitch . options [ 0 ] . disabled = true ;
499+ }
500+ } ) ;
501+
502+ // 2. Initialize Edge Engine in Background
503+ EdgeTTS . init ( )
504+ . then ( ( ) => {
505+ availableEngineNames . push ( "edge" ) ;
506+ if ( engineSwitch && engineSwitch . options [ 1 ] ) {
507+ engineSwitch . options [ 1 ] . disabled = false ;
508+ engineSwitch . options [ 1 ] . textContent = engineSwitch . options [ 1 ] . textContent . replace ( " (Loading...)" , "" ) . replace ( " (Unavailable)" , "" ) ;
509+ }
512510
511+ // Auto-switch to Edge if it's supposed to be default or if Browser failed
512+ if ( ! availableEngineNames . includes ( "browser" ) || activeEngineName === "edge" ) {
513+ engineSwitch . value = "edge" ;
514+ activeEngineName = "edge" ;
515+ activeEngine = engines . edge ;
516+ populateVoiceList ( ) ;
517+ }
518+ } )
519+ . catch ( ( error ) => {
520+ console . error ( "Enhanced Edge TTS engine failed to load." , error ) ;
521+ if ( engineSwitch && engineSwitch . options [ 1 ] ) {
522+ engineSwitch . options [ 1 ] . disabled = true ;
523+ engineSwitch . options [ 1 ] . textContent = engineSwitch . options [ 1 ] . textContent . replace ( " (Loading...)" , " (Unavailable)" ) ;
524+ }
525+ } ) ;
526+
527+ // 3. Attach Event Listeners Immediately
528+ if ( engineSwitch ) {
513529 engineSwitch . addEventListener ( "change" , handleEngineChange ) ;
530+ }
514531
515- populateVoiceList ( ) ;
532+ if ( document . readyState === "loading" ) {
533+ document . addEventListener ( "DOMContentLoaded" , ( ) => tts ( "default" ) ) ;
534+ } else {
535+ tts ( "default" ) ;
536+ }
516537
517- if ( document . readyState === "loading" ) {
518- document . addEventListener ( "DOMContentLoaded" , ( ) => tts ( "default" ) ) ;
519- } else {
520- tts ( "default" ) ;
521- }
522- } ) ( ) ;
523538} catch ( error ) {
524- console . error ( "Error loading TTS engines : " , error ) ;
539+ console . error ( "Error loading TTS setup : " , error ) ;
525540}
526541
527542export { tts , setLanguageAndFindVoice } ;
0 commit comments