Skip to content

Commit 69b81c9

Browse files
committed
Adding russian and ukrainian stressing; fixing multiple values display
1 parent 438e0bd commit 69b81c9

1 file changed

Lines changed: 120 additions & 42 deletions

File tree

wiktionary_pron/scripts/main.js

Lines changed: 120 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,13 @@ async function transcribe(mode, translate = false, inputText = null) {
9696
console.log("processing", word);
9797
let { status, value } = await getIpa(word, lang, langStyle, langForm);
9898
let values = "";
99-
if (lang === "German") {
99+
if (
100+
lang === "German" ||
101+
lang === "Ukrainian" ||
102+
lang === "Czech" ||
103+
lang === "Russian" ||
104+
lang === "Lituanian"
105+
) {
100106
[value, values] = processGermanIpa(value);
101107
}
102108

@@ -168,7 +174,13 @@ async function transcribe(mode, translate = false, inputText = null) {
168174
let value;
169175
console.log(ipa.value);
170176
value = ipa.value;
171-
if (lang === "German" || lang === "Czech" || lang === "Lithuanian") {
177+
if (
178+
lang === "German" ||
179+
lang === "Czech" ||
180+
lang === "Lithuanian" ||
181+
lang === "Russian" ||
182+
lang === "Ukrainian"
183+
) {
172184
[value, values] = processGermanIpa(value);
173185
} else {
174186
values = "";
@@ -296,7 +308,13 @@ async function transcribe(mode, translate = false, inputText = null) {
296308
resultDiv.className = "cell";
297309
let value, values;
298310

299-
if (lang === "German" || lang === "Czech") {
311+
if (
312+
lang === "German" ||
313+
lang === "Czech" ||
314+
lang === "Ukrainian" ||
315+
lang === "Lituanian" ||
316+
lang === "Russian"
317+
) {
300318
[value, values] = processGermanIpa(results[i]?.value || "");
301319
} else {
302320
value = results[i]?.value;
@@ -372,7 +390,13 @@ async function transcribe(mode, translate = false, inputText = null) {
372390
console.log(err);
373391
} finally {
374392
console.log("finally");
375-
if (lang === "German" || lang === "Czech" || lang === "Lituanian") {
393+
if (
394+
lang === "German" ||
395+
lang === "Czech" ||
396+
lang === "Lituanian" ||
397+
lang === "Russian" ||
398+
lang === "Ukrainian"
399+
) {
376400
Array.from(document.querySelectorAll(".ipa")).map((x) => {
377401
if (
378402
Boolean(x.getAttribute("all_values")) &&
@@ -387,21 +411,33 @@ async function transcribe(mode, translate = false, inputText = null) {
387411
if (all_values === "") {
388412
return;
389413
}
390-
const c = event.target.textContent;
414+
let c = "";
415+
if (mode === "line") {
416+
c = event.target.getAttribute("content");
417+
} else {
418+
c = event.target.textContent;
419+
}
391420

392421
function cycle(all_values, current) {
393-
const split = all_values.split("\n");
394-
if (split.length > 1) {
395-
const index = split.indexOf(current.trim());
396-
if (index === split.length - 1) {
397-
return split[0];
398-
} else {
399-
return split[index + 1];
400-
}
422+
const options = all_values
423+
.split("\n")
424+
.map((item) => item.trim()) // Trim every item in the array
425+
.filter((item) => item); // Remove any empty strings (from blank lines)
426+
if (options.length <= 1) {
427+
return current; // Or return options[0] if that's preferred
401428
}
429+
const normalizedCurrent = current.trim().replace(/:/g, "ː");
430+
431+
const currentIndex = options.indexOf(normalizedCurrent);
432+
if (currentIndex === -1) {
433+
return options[0];
434+
}
435+
const nextIndex = (currentIndex + 1) % options.length;
436+
return options[nextIndex];
402437
}
403438

404439
const new_value = cycle(all_values, c);
440+
405441
if (mode === "line") {
406442
event.target.setAttribute("content", new_value);
407443
} else {
@@ -411,37 +447,79 @@ async function transcribe(mode, translate = false, inputText = null) {
411447
});
412448
});
413449
}
414-
if (lang === "Ukrainian" || lang == "Russian") {
415-
function getStressing(word) {
416-
let stressedText = word;
417-
if (globalThis.lexicon[lang]) {
418-
let dictRecord = globalThis.lexicon[lang].get(
419-
word.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
420-
);
421-
if (
422-
word.trim().length > 0 &&
423-
dictRecord &&
424-
dictRecord.length >= word.length
425-
) {
426-
console.log(`found [${word}], [${dictRecord}]`);
427-
stressedText = dictRecord;
428-
}
429-
}
430-
return stressedText;
431-
}
450+
if (lang === "Ukrainian" || lang === "Russian") {
451+
// Define language-specific vowel rules once
452+
const VOWELS = {
453+
Russian: /[аэиуеюяёоы]/gi,
454+
Ukrainian: /[аеиіоуєюяї]/gi,
455+
};
456+
const VOWELS_REPLACE = {
457+
Russian: /[аэиуеюяёоыАЭИУЕЮЯЁОЫ]/,
458+
Ukrainian: /[аеиіоуєюяїАЕИІОУЄЮЯЇ]/,
459+
};
460+
const STRESS_MARK = "\u0301";
461+
462+
/**
463+
* Applies a stress mark to a vowel, with special handling for
464+
* Cyrillic letters like 'і' and 'ё' to ensure correct typography.
465+
*/
466+
const applyStress = (vowel) => {
467+
if (vowel === "і") return "ı" + STRESS_MARK; // Use Latin dotless 'ı'
468+
if (vowel === "ё" || vowel === "Ё") return vowel; // 'ё' is already stressed
469+
return vowel + STRESS_MARK;
470+
};
471+
472+
document.querySelectorAll(".input_text").forEach((element) => {
473+
element.textContent = element.textContent.replace(
474+
/[\p{Letter}\p{Mark}-]+/gu, // Matches each word
475+
(word) => {
476+
// --- Stage 1: Check for a STRESSED entry in the dictionary ---
477+
if (globalThis.lexicon?.[lang] && word.trim().length > 0) {
478+
const dictRecord =
479+
globalThis.lexicon[lang].get(word) ||
480+
globalThis.lexicon[lang].get(word.toLowerCase());
481+
482+
if (
483+
dictRecord &&
484+
!dictRecord.includes(",") &&
485+
dictRecord.includes(STRESS_MARK)
486+
) {
487+
// The dictionary provides a stressed version. This is the highest authority.
488+
// We transfer its stress to the original word to preserve case.
489+
const stressIndex = dictRecord.indexOf(STRESS_MARK);
490+
const vowelIndex = stressIndex - 1;
491+
if (vowelIndex >= 0 && vowelIndex < word.length) {
492+
const vowelToStress = word[vowelIndex];
493+
const stressedVowel = applyStress(vowelToStress);
494+
const finalWord =
495+
word.slice(0, vowelIndex) +
496+
stressedVowel +
497+
word.slice(vowelIndex + 1);
498+
499+
console.log(
500+
`found stressed record for [${word}] -> [${finalWord}]`,
501+
);
502+
return finalWord;
503+
} // Return the authoritative stressed word and STOP.
504+
}
505+
}
432506

433-
const addStressIfOneSyllable = (word) =>
434-
word.match(/[аеиоуєюяэёы]/gi)?.length === 1
435-
? word.replace(/[аеиоуєюяїАЕИІОУЄЮЯЭЁЫ]/, (match) => match + "\u0301")
436-
: word;
437-
document
438-
.querySelectorAll(".input_text")
439-
.forEach(
440-
(x) =>
441-
(x.textContent = getStressing(
442-
addStressIfOneSyllable(x.textContent),
443-
)),
507+
// --- Stage 2: Fallback to one-syllable stressing rule ---
508+
// This code now runs if:
509+
// a) The word was not in the dictionary.
510+
// b) The word was in the dictionary but had NO stress mark.
511+
const vowelRegex = VOWELS[lang];
512+
const syllables = word.match(vowelRegex);
513+
if (syllables && syllables.length === 1) {
514+
return word.replace(VOWELS_REPLACE[lang], (vowel) =>
515+
applyStress(vowel),
516+
);
517+
}
518+
519+
return word;
520+
},
444521
);
522+
});
445523
}
446524
globalThis.transcriptionMode = mode;
447525
globalThis.transcriptionLang = lang;

0 commit comments

Comments
 (0)