@@ -96,7 +96,13 @@ async function transcribe(mode, translate = false, inputText = null) {
9696 console . log ( "processing" , word ) ;
9797 let { status, value } = await getIpa ( word , lang , langStyle , langForm ) ;
9898 let values = "" ;
99- if ( lang === "German" ) {
99+ if (
100+ lang === "German" ||
101+ lang === "Ukrainian" ||
102+ lang === "Czech" ||
103+ lang === "Russian" ||
104+ lang === "Lituanian"
105+ ) {
100106 [ value , values ] = processGermanIpa ( value ) ;
101107 }
102108
@@ -168,7 +174,13 @@ async function transcribe(mode, translate = false, inputText = null) {
168174 let value ;
169175 console . log ( ipa . value ) ;
170176 value = ipa . value ;
171- if ( lang === "German" || lang === "Czech" || lang === "Lithuanian" ) {
177+ if (
178+ lang === "German" ||
179+ lang === "Czech" ||
180+ lang === "Lithuanian" ||
181+ lang === "Russian" ||
182+ lang === "Ukrainian"
183+ ) {
172184 [ value , values ] = processGermanIpa ( value ) ;
173185 } else {
174186 values = "" ;
@@ -296,7 +308,13 @@ async function transcribe(mode, translate = false, inputText = null) {
296308 resultDiv . className = "cell" ;
297309 let value , values ;
298310
299- if ( lang === "German" || lang === "Czech" ) {
311+ if (
312+ lang === "German" ||
313+ lang === "Czech" ||
314+ lang === "Ukrainian" ||
315+ lang === "Lituanian" ||
316+ lang === "Russian"
317+ ) {
300318 [ value , values ] = processGermanIpa ( results [ i ] ?. value || "" ) ;
301319 } else {
302320 value = results [ i ] ?. value ;
@@ -372,7 +390,13 @@ async function transcribe(mode, translate = false, inputText = null) {
372390 console . log ( err ) ;
373391 } finally {
374392 console . log ( "finally" ) ;
375- if ( lang === "German" || lang === "Czech" || lang === "Lituanian" ) {
393+ if (
394+ lang === "German" ||
395+ lang === "Czech" ||
396+ lang === "Lituanian" ||
397+ lang === "Russian" ||
398+ lang === "Ukrainian"
399+ ) {
376400 Array . from ( document . querySelectorAll ( ".ipa" ) ) . map ( ( x ) => {
377401 if (
378402 Boolean ( x . getAttribute ( "all_values" ) ) &&
@@ -387,21 +411,33 @@ async function transcribe(mode, translate = false, inputText = null) {
387411 if ( all_values === "" ) {
388412 return ;
389413 }
390- const c = event . target . textContent ;
414+ let c = "" ;
415+ if ( mode === "line" ) {
416+ c = event . target . getAttribute ( "content" ) ;
417+ } else {
418+ c = event . target . textContent ;
419+ }
391420
392421 function cycle ( all_values , current ) {
393- const split = all_values . split ( "\n" ) ;
394- if ( split . length > 1 ) {
395- const index = split . indexOf ( current . trim ( ) ) ;
396- if ( index === split . length - 1 ) {
397- return split [ 0 ] ;
398- } else {
399- return split [ index + 1 ] ;
400- }
422+ const options = all_values
423+ . split ( "\n" )
424+ . map ( ( item ) => item . trim ( ) ) // Trim every item in the array
425+ . filter ( ( item ) => item ) ; // Remove any empty strings (from blank lines)
426+ if ( options . length <= 1 ) {
427+ return current ; // Or return options[0] if that's preferred
401428 }
429+ const normalizedCurrent = current . trim ( ) . replace ( / : / g, "ː" ) ;
430+
431+ const currentIndex = options . indexOf ( normalizedCurrent ) ;
432+ if ( currentIndex === - 1 ) {
433+ return options [ 0 ] ;
434+ }
435+ const nextIndex = ( currentIndex + 1 ) % options . length ;
436+ return options [ nextIndex ] ;
402437 }
403438
404439 const new_value = cycle ( all_values , c ) ;
440+
405441 if ( mode === "line" ) {
406442 event . target . setAttribute ( "content" , new_value ) ;
407443 } else {
@@ -411,37 +447,79 @@ async function transcribe(mode, translate = false, inputText = null) {
411447 } ) ;
412448 } ) ;
413449 }
414- if ( lang === "Ukrainian" || lang == "Russian" ) {
415- function getStressing ( word ) {
416- let stressedText = word ;
417- if ( globalThis . lexicon [ lang ] ) {
418- let dictRecord = globalThis . lexicon [ lang ] . get (
419- word . replace ( / [ ^ \p{ Letter} \p{ Mark} - ] + / gu, "" ) ,
420- ) ;
421- if (
422- word . trim ( ) . length > 0 &&
423- dictRecord &&
424- dictRecord . length >= word . length
425- ) {
426- console . log ( `found [${ word } ], [${ dictRecord } ]` ) ;
427- stressedText = dictRecord ;
428- }
429- }
430- return stressedText ;
431- }
450+ if ( lang === "Ukrainian" || lang === "Russian" ) {
451+ // Define language-specific vowel rules once
452+ const VOWELS = {
453+ Russian : / [ а э и у е ю я ё о ы ] / gi,
454+ Ukrainian : / [ а е и і о у є ю я ї ] / gi,
455+ } ;
456+ const VOWELS_REPLACE = {
457+ Russian : / [ а э и у е ю я ё о ы А Э И У Е Ю Я Ё О Ы ] / ,
458+ Ukrainian : / [ а е и і о у є ю я ї А Е И І О У Є Ю Я Ї ] / ,
459+ } ;
460+ const STRESS_MARK = "\u0301" ;
461+
462+ /**
463+ * Applies a stress mark to a vowel, with special handling for
464+ * Cyrillic letters like 'і' and 'ё' to ensure correct typography.
465+ */
466+ const applyStress = ( vowel ) => {
467+ if ( vowel === "і" ) return "ı" + STRESS_MARK ; // Use Latin dotless 'ı'
468+ if ( vowel === "ё" || vowel === "Ё" ) return vowel ; // 'ё' is already stressed
469+ return vowel + STRESS_MARK ;
470+ } ;
471+
472+ document . querySelectorAll ( ".input_text" ) . forEach ( ( element ) => {
473+ element . textContent = element . textContent . replace (
474+ / [ \p{ Letter} \p{ Mark} - ] + / gu, // Matches each word
475+ ( word ) => {
476+ // --- Stage 1: Check for a STRESSED entry in the dictionary ---
477+ if ( globalThis . lexicon ?. [ lang ] && word . trim ( ) . length > 0 ) {
478+ const dictRecord =
479+ globalThis . lexicon [ lang ] . get ( word ) ||
480+ globalThis . lexicon [ lang ] . get ( word . toLowerCase ( ) ) ;
481+
482+ if (
483+ dictRecord &&
484+ ! dictRecord . includes ( "," ) &&
485+ dictRecord . includes ( STRESS_MARK )
486+ ) {
487+ // The dictionary provides a stressed version. This is the highest authority.
488+ // We transfer its stress to the original word to preserve case.
489+ const stressIndex = dictRecord . indexOf ( STRESS_MARK ) ;
490+ const vowelIndex = stressIndex - 1 ;
491+ if ( vowelIndex >= 0 && vowelIndex < word . length ) {
492+ const vowelToStress = word [ vowelIndex ] ;
493+ const stressedVowel = applyStress ( vowelToStress ) ;
494+ const finalWord =
495+ word . slice ( 0 , vowelIndex ) +
496+ stressedVowel +
497+ word . slice ( vowelIndex + 1 ) ;
498+
499+ console . log (
500+ `found stressed record for [${ word } ] -> [${ finalWord } ]` ,
501+ ) ;
502+ return finalWord ;
503+ } // Return the authoritative stressed word and STOP.
504+ }
505+ }
432506
433- const addStressIfOneSyllable = ( word ) =>
434- word . match ( / [ а е и о у є ю я э ё ы ] / gi) ?. length === 1
435- ? word . replace ( / [ а е и о у є ю я ї А Е И І О У Є Ю Я Э Ё Ы ] / , ( match ) => match + "\u0301" )
436- : word ;
437- document
438- . querySelectorAll ( ".input_text" )
439- . forEach (
440- ( x ) =>
441- ( x . textContent = getStressing (
442- addStressIfOneSyllable ( x . textContent ) ,
443- ) ) ,
507+ // --- Stage 2: Fallback to one-syllable stressing rule ---
508+ // This code now runs if:
509+ // a) The word was not in the dictionary.
510+ // b) The word was in the dictionary but had NO stress mark.
511+ const vowelRegex = VOWELS [ lang ] ;
512+ const syllables = word . match ( vowelRegex ) ;
513+ if ( syllables && syllables . length === 1 ) {
514+ return word . replace ( VOWELS_REPLACE [ lang ] , ( vowel ) =>
515+ applyStress ( vowel ) ,
516+ ) ;
517+ }
518+
519+ return word ;
520+ } ,
444521 ) ;
522+ } ) ;
445523 }
446524 globalThis . transcriptionMode = mode ;
447525 globalThis . transcriptionLang = lang ;
0 commit comments