Skip to content

Commit b4c55b2

Browse files
committed
separating msedge and browser init, making direct ms url first
1 parent 47bcbda commit b4c55b2

1 file changed

Lines changed: 108 additions & 93 deletions

File tree

wiktionary_pron/scripts/tts.js

Lines changed: 108 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class IndexedDBCache {
4040
}
4141
}
4242

43+
/**
44+
* A class that mimics the EasySpeech API but uses the Microsoft Edge
45+
* streaming TTS service via a Cloudflare Worker farm.
46+
*/
4347
class StreamingTTS {
4448
#workers = [
4549
{base: "https://silent-unit-b6ca.hellpanderrr.workers.dev", lastUsed: 0},
@@ -48,7 +52,6 @@ class StreamingTTS {
4852
{base: "https://tts-4.hellpanderrr.workers.dev", lastUsed: 0},
4953
{base: "https://tts-5.hellpanderrr.workers.dev", lastUsed: 0},
5054
{base: "https://tts-6.hellpanderrr.workers.dev", lastUsed: 0}
51-
5255
];
5356

5457
#requestDelayMs = 3000;
@@ -94,10 +97,23 @@ class StreamingTTS {
9497
async init() {
9598
if (this.#isInitialized) return true;
9699

97-
for (let i = 0; i < this.#workers.length; i++) {
98-
const worker = this.#getBestWorker();
99-
worker.lastUsed = Date.now();
100+
// 1. Try Direct Microsoft URL FIRST (Usually much faster and doesn't get blocked for /voices)
101+
try {
102+
const directUrl = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
103+
const response = await fetch(directUrl);
104+
if (!response.ok) throw new Error(`Direct error: ${response.status}`);
105+
const data = await response.json();
106+
this.#voices = this.#transformVoiceList(data);
107+
this.#isInitialized = true;
108+
console.debug("StreamingTTS initialized via Direct MS URL.");
109+
return true;
110+
} catch (directError) {
111+
console.warn("Direct voice fetch failed, attempting proxy workers...", directError);
112+
}
100113

114+
// 2. Fallback to Proxies if direct fails
115+
for (let i = 0; i < this.#workers.length; i++) {
116+
const worker = this.#workers[i];
101117
try {
102118
const response = await fetch(`${worker.base}/voices`);
103119
if (!response.ok) throw new Error(`Proxy error: ${response.status}`);
@@ -112,27 +128,16 @@ class StreamingTTS {
112128
}
113129
}
114130

115-
console.warn("All proxy workers failed, attempting direct fallback...");
116-
117-
try {
118-
const directUrl = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
119-
const response = await fetch(directUrl);
120-
if (!response.ok) throw new Error(`Direct error: ${response.status}`);
121-
const data = await response.json();
122-
this.#voices = this.#transformVoiceList(data);
123-
this.#isInitialized = true;
124-
return true;
125-
} catch (directError) {
126-
console.error("All voice fetch methods failed.", directError);
127-
this.#voices = [{
128-
name: "Microsoft Aria Online (Natural) - English (United States)",
129-
lang: "en-US",
130-
default: true,
131-
raw: {ShortName: "en-US-AriaNeural"}
132-
}];
133-
this.#isInitialized = true;
134-
return true;
135-
}
131+
// 3. Ultimate Fallback
132+
console.error("All voice fetch methods failed. Using default voice.");
133+
this.#voices = [{
134+
name: "Microsoft Aria Online (Natural) - English (United States)",
135+
lang: "en-US",
136+
default: true,
137+
raw: {ShortName: "en-US-AriaNeural"}
138+
}];
139+
this.#isInitialized = true;
140+
return true;
136141
}
137142

138143
voices() {
@@ -192,7 +197,7 @@ class StreamingTTS {
192197
const maxAttempts = this.#workers.length;
193198
let audioBlob = null;
194199

195-
// 2. Worker rotation loop (instant retry on error, NO delays here)
200+
// 2. Worker rotation loop
196201
while (attempts < maxAttempts) {
197202
if (this.#currentAbortController.signal.aborted) return;
198203

@@ -224,7 +229,7 @@ class StreamingTTS {
224229
}
225230

226231
audioBlob = blob;
227-
break; // Success, exit retry loop
232+
break;
228233

229234
} catch (error) {
230235
if (error.name === 'AbortError') {
@@ -240,9 +245,7 @@ class StreamingTTS {
240245
if (this.#enableCache) {
241246
this.#cache.saveAudio(cacheKey, audioBlob);
242247
}
243-
244248
if (this.#currentAbortController.signal.aborted) return;
245-
246249
this.#playBlob(audioBlob);
247250
} else {
248251
console.error("All workers failed.");
@@ -295,10 +298,12 @@ try {
295298
} catch (error) {
296299
console.log("Failed to load Edge TTS engine: ", error);
297300
}
301+
298302
const engines = {
299303
browser: EasySpeech,
300304
edge: EdgeTTS,
301305
};
306+
302307
let activeEngine = engines.browser;
303308
let activeEngineName = "browser";
304309

@@ -320,9 +325,10 @@ function populateVoiceList(langCode) {
320325
option.setAttribute("data-name", displayName);
321326
voiceSelect.appendChild(option);
322327
});
328+
323329
if (langCode) {
324-
const voices = Array.from(voiceSelect.options);
325-
const relevantVoices = voices.filter((option) =>
330+
const optionsArray = Array.from(voiceSelect.options);
331+
const relevantVoices = optionsArray.filter((option) =>
326332
(option.getAttribute("data-lang") || "").includes(langCode),
327333
);
328334
if (relevantVoices.length > 0) {
@@ -338,7 +344,6 @@ function getSelectedVoice() {
338344
?.selectedOptions[0]?.getAttribute("data-name");
339345

340346
if (!selectedVoiceName) return voices[0];
341-
342347
return voices.find((v) => (v.name || v.friendlyName) === selectedVoiceName);
343348
}
344349

@@ -348,24 +353,17 @@ function tts(transcriptionMode) {
348353
? document.querySelectorAll(".input_text")
349354
: document.querySelectorAll("#result span");
350355
const lineButtons = document.querySelectorAll(".audio-popup-line");
351-
const getVolume = () =>
352-
parseFloat(document.querySelector("#tts_volume").value) / 100;
353-
const getSpeed = () =>
354-
parseFloat(document.querySelector("#tts_speed").value) / 100;
356+
const getVolume = () => parseFloat(document.querySelector("#tts_volume").value) / 100;
357+
const getSpeed = () => parseFloat(document.querySelector("#tts_speed").value) / 100;
355358

356359
lineButtons.forEach((button) => {
357360
button.addEventListener("click", (e) => {
358-
let lineText = Array.from(
359-
e.currentTarget.parentElement.querySelectorAll(".input_text"),
360-
)
361-
.map((x) => x.textContent)
362-
.join(" ");
361+
let lineText = Array.from(e.currentTarget.parentElement.querySelectorAll(".input_text"))
362+
.map((x) => x.textContent).join(" ");
363363
if (!lineText)
364-
lineText = Array.from(
365-
e.currentTarget.parentElement.querySelectorAll(".ipa"),
366-
)
367-
.map((x) => x.getAttribute("data-word"))
368-
.join(" ");
364+
lineText = Array.from(e.currentTarget.parentElement.querySelectorAll(".ipa"))
365+
.map((x) => x.getAttribute("data-word")).join(" ");
366+
369367
activeEngine.speak({
370368
text: lineText,
371369
voice: getSelectedVoice(),
@@ -380,6 +378,7 @@ function tts(transcriptionMode) {
380378
let timer;
381379
const popup = el.previousElementSibling;
382380
if (!popup) return;
381+
383382
const getTextContent = (el) => {
384383
switch (transcriptionMode) {
385384
case "default":
@@ -392,6 +391,7 @@ function tts(transcriptionMode) {
392391
return el.textContent;
393392
}
394393
};
394+
395395
popup.addEventListener("click", () =>
396396
activeEngine.speak({
397397
text: getTextContent(el),
@@ -401,6 +401,7 @@ function tts(transcriptionMode) {
401401
volume: getVolume()
402402
}),
403403
);
404+
404405
el.addEventListener("mouseenter", () => {
405406
popup.style.opacity = "1";
406407
popup.classList.add("show-popup");
@@ -412,6 +413,7 @@ function tts(transcriptionMode) {
412413
}, 3000);
413414
});
414415
});
416+
415417
el.addEventListener("mouseleave", () => {
416418
timer = setTimeout(() => {
417419
popup.style.opacity = "0";
@@ -438,7 +440,6 @@ let voiceSelect = null;
438440
function setLanguageAndFindVoice(language) {
439441
try {
440442
if (!voiceSelect) return;
441-
442443
const normalizedLanguage = language.replace(/_/g, "-");
443444

444445
const currentVoices = activeEngine.voices();
@@ -451,20 +452,15 @@ function setLanguageAndFindVoice(language) {
451452
return;
452453
}
453454

454-
const otherEngineNames = availableEngineNames.filter(
455-
(name) => name !== activeEngineName,
456-
);
455+
const otherEngineNames = availableEngineNames.filter((name) => name !== activeEngineName);
457456
for (const engineName of otherEngineNames) {
458457
const otherEngine = engines[engineName];
459-
bestVoice = otherEngine
460-
.voices()
461-
.find((v) => v.lang.replace(/_/g, "-").startsWith(normalizedLanguage));
458+
bestVoice = otherEngine.voices().find((v) => v.lang.replace(/_/g, "-").startsWith(normalizedLanguage));
462459

463460
if (bestVoice) {
464461
activeEngineName = engineName;
465462
activeEngine = otherEngine;
466463
engineSwitch.value = engineName;
467-
468464
populateVoiceList();
469465
voiceSelect.value = bestVoice.name || bestVoice.friendlyName;
470466
return;
@@ -475,53 +471,72 @@ function setLanguageAndFindVoice(language) {
475471
}
476472
}
477473

474+
// ============================================================================
475+
// == Parallel Initialization (Decoupled)
476+
// ============================================================================
478477
try {
479-
(async () => {
480-
engineSwitch = document.querySelector("#tts_switch");
481-
voiceSelect = document.querySelector("#tts");
482-
483-
const results = await Promise.allSettled([
484-
EasySpeech.init({ maxTimeout: 5000, interval: 250 }),
485-
EdgeTTS.init(),
486-
]);
487-
if (results[0].status === "fulfilled") availableEngineNames.push("browser");
488-
if (results[1].status === "fulfilled") availableEngineNames.push("edge");
489-
490-
const browserSuccess = availableEngineNames.includes("browser");
491-
const edgeSuccess = availableEngineNames.includes("edge");
492-
493-
if (!browserSuccess && !edgeSuccess) {
494-
engineSwitch.innerHTML = "<option>TTS Unavailable</option>";
495-
return;
496-
}
478+
engineSwitch = document.querySelector("#tts_switch");
479+
voiceSelect = document.querySelector("#tts");
497480

498-
if (!edgeSuccess) {
499-
const edgeOption = engineSwitch.options[1];
500-
edgeOption.disabled = true;
501-
edgeOption.textContent += " (Unavailable)";
502-
}
481+
// Indicate that Edge is loading initially
482+
if (engineSwitch && engineSwitch.options[1]) {
483+
engineSwitch.options[1].disabled = true;
484+
engineSwitch.options[1].textContent += " (Loading...)";
485+
}
503486

504-
if (!browserSuccess) {
505-
engineSwitch.options[0].disabled = true;
506-
if (edgeSuccess) {
507-
engineSwitch.value = "edge";
508-
activeEngineName = "edge";
509-
activeEngine = engines.edge;
510-
}
511-
}
487+
// 1. Initialize Browser Engine (Fast)
488+
EasySpeech.init({maxTimeout: 5000, interval: 250})
489+
.then(() => {
490+
availableEngineNames.push("browser");
491+
if (activeEngineName === "browser") {
492+
populateVoiceList(); // Populate immediately
493+
}
494+
})
495+
.catch((error) => {
496+
console.error("Standard browser TTS engine failed to load.", error);
497+
if (engineSwitch && engineSwitch.options[0]) {
498+
engineSwitch.options[0].disabled = true;
499+
}
500+
});
501+
502+
// 2. Initialize Edge Engine in Background
503+
EdgeTTS.init()
504+
.then(() => {
505+
availableEngineNames.push("edge");
506+
if (engineSwitch && engineSwitch.options[1]) {
507+
engineSwitch.options[1].disabled = false;
508+
engineSwitch.options[1].textContent = engineSwitch.options[1].textContent.replace(" (Loading...)", "").replace(" (Unavailable)", "");
509+
}
512510

511+
// Auto-switch to Edge if it's supposed to be default or if Browser failed
512+
if (!availableEngineNames.includes("browser") || activeEngineName === "edge") {
513+
engineSwitch.value = "edge";
514+
activeEngineName = "edge";
515+
activeEngine = engines.edge;
516+
populateVoiceList();
517+
}
518+
})
519+
.catch((error) => {
520+
console.error("Enhanced Edge TTS engine failed to load.", error);
521+
if (engineSwitch && engineSwitch.options[1]) {
522+
engineSwitch.options[1].disabled = true;
523+
engineSwitch.options[1].textContent = engineSwitch.options[1].textContent.replace(" (Loading...)", " (Unavailable)");
524+
}
525+
});
526+
527+
// 3. Attach Event Listeners Immediately
528+
if (engineSwitch) {
513529
engineSwitch.addEventListener("change", handleEngineChange);
530+
}
514531

515-
populateVoiceList();
532+
if (document.readyState === "loading") {
533+
document.addEventListener("DOMContentLoaded", () => tts("default"));
534+
} else {
535+
tts("default");
536+
}
516537

517-
if (document.readyState === "loading") {
518-
document.addEventListener("DOMContentLoaded", () => tts("default"));
519-
} else {
520-
tts("default");
521-
}
522-
})();
523538
} catch (error) {
524-
console.error("Error loading TTS engines: ", error);
539+
console.error("Error loading TTS setup: ", error);
525540
}
526541

527542
export { tts, setLanguageAndFindVoice };

0 commit comments

Comments
 (0)