@@ -9,8 +9,8 @@ const LEXICON_LANGUAGES = {
99 Czech : "czech_lexicon.zip" ,
1010 French : "french_lexicon_v3.zip" ,
1111 Lithuanian : "lt_lexicon.zip" ,
12- Ukrainian : "uk_lexicon .zip" ,
13- Russian : "ru_lexicon .zip" ,
12+ Ukrainian : "uk_lexicon_v4 .zip" ,
13+ Russian : "ru_lexicon_v4 .zip" ,
1414 Icelandic : "is_lexicon.zip" ,
1515} ;
1616
@@ -29,7 +29,7 @@ class OptimizedV3Lexicon {
2929 } ;
3030 }
3131
32- async loadFromBlob ( blob ) {
32+ async loadFromBlob ( blob , language ) {
3333 const startTime = performance . now ( ) ;
3434
3535 try {
@@ -40,7 +40,7 @@ class OptimizedV3Lexicon {
4040 updateLoadingText ( "" , "" , "Parsing lexicon data" ) ;
4141
4242 const parseStart = performance . now ( ) ;
43- await this . parseV3Data ( jsonStr ) ;
43+ await this . parseV3Data ( jsonStr , language ) ;
4444 this . stats . parseTime = performance . now ( ) - parseStart ;
4545
4646 this . calculateMemoryUsage ( ) ;
@@ -62,21 +62,49 @@ class OptimizedV3Lexicon {
6262 }
6363 }
6464
65- async parseV3Data ( jsonStr ) {
65+ async parseV3Data ( jsonStr , language ) {
6666 const data = JSON . parse ( jsonStr ) ;
6767
6868 if ( Array . isArray ( data ) ) {
6969 // V3 format with prefix compression: [[prefix_len, suffix, ipa], ...]
70- console . log ( "📂 Processing V3 prefix compression format" ) ;
70+ const isV4Format = language === "Russian" || language === "Ukrainian" ;
71+
72+ if ( isV4Format ) {
73+ console . log ( "📂 Processing V4 prefix/value compression format" ) ;
74+ } else {
75+ console . log ( "📂 Processing V3 prefix compression format" ) ;
76+ }
7177
7278 let currentKey = "" ;
7379 const totalEntries = data . length ;
7480 const progressInterval = Math . floor ( totalEntries / 50 ) ; // Update every 2%
81+ const STRESS_MARK = "\u0301" ;
7582
7683 for ( let i = 0 ; i < data . length ; i ++ ) {
77- const [ prefixLen , suffix , ipa ] = data [ i ] ;
78- currentKey = currentKey . substring ( 0 , prefixLen ) + suffix ;
79- this . entries . set ( currentKey , ipa ) ;
84+ if ( isV4Format ) {
85+ // V4 DECODING LOGIC
86+ const [ prefixLen , suffix , valueEncoding ] = data [ i ] ;
87+ currentKey = currentKey . substring ( 0 , prefixLen ) + suffix ;
88+
89+ let finalValue ;
90+ if ( typeof valueEncoding === "number" ) {
91+ // It's an integer: the index of the stressed vowel.
92+ const stressPos = valueEncoding ;
93+ finalValue =
94+ currentKey . slice ( 0 , stressPos + 1 ) +
95+ STRESS_MARK +
96+ currentKey . slice ( stressPos + 1 ) ;
97+ } else {
98+ // It's a string: an exception (e.g., multi-form). Use it directly.
99+ finalValue = valueEncoding ;
100+ }
101+ this . entries . set ( currentKey , finalValue ) ;
102+ } else {
103+ // V3 DECODING LOGIC (original code)
104+ const [ prefixLen , suffix , ipa ] = data [ i ] ;
105+ currentKey = currentKey . substring ( 0 , prefixLen ) + suffix ;
106+ this . entries . set ( currentKey , ipa ) ;
107+ }
80108
81109 // Progress update with yielding for responsiveness
82110 if ( i % progressInterval === 0 ) {
@@ -89,8 +117,8 @@ class OptimizedV3Lexicon {
89117 ) } %)`,
90118 ) ;
91119
92- // Yield control every 4th progress update to prevent blocking
93- if ( i % ( progressInterval * 4 ) === 0 ) {
120+ // Yield control every 2nd progress update to prevent blocking
121+ if ( i % ( progressInterval * 2 ) === 0 ) {
94122 await new Promise ( ( resolve ) => setTimeout ( resolve , 0 ) ) ;
95123 }
96124 }
@@ -280,8 +308,13 @@ async function loadLexicon(language) {
280308 let worker ;
281309
282310 try {
283- // Special handling for French optimized format
284- if ( language === "French" || language === "German" ) {
311+ // Special handling for optimized format
312+ if (
313+ language === "French" ||
314+ language === "German" ||
315+ language === "Ukrainian" ||
316+ language === "Russian"
317+ ) {
285318 return await loadOptimizedLexicon ( language ) ;
286319 }
287320
@@ -348,7 +381,7 @@ async function loadOptimizedLexicon(language) {
348381
349382 const optimizedLexicon = new OptimizedV3Lexicon ( ) ;
350383 optimizedLexicon . stats . downloadTime = downloadTime ;
351- const success = await optimizedLexicon . loadFromBlob ( blob ) ;
384+ const success = await optimizedLexicon . loadFromBlob ( blob , language ) ;
352385
353386 if ( ! success ) {
354387 throw new Error ( "Failed to load optimized lexicon" ) ;
0 commit comments