Skip to content

Commit 6318e90

Browse files
committed
Adding french lexicon
1 parent 712b179 commit 6318e90

4 files changed

Lines changed: 67 additions & 35 deletions

File tree

wiktionary_pron/scripts/lexicon.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ async function loadLexicon(language) {
44
const languages = {
55
German: "german_lexicon.zip",
66
Czech: "czech_lexicon.zip",
7+
French: "french_lexicon.zip",
78
};
89
const lexiconFolder = "./utils/";
910

wiktionary_pron/scripts/main.js

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@ import {
44
asyncMapStrict,
55
clearStorage,
66
createElementFromHTML,
7+
disableAll,
8+
enableAll,
79
get_ipa_no_cache,
810
memoizeLocalStorage,
911
wait,
10-
enableAll,
11-
disableAll,
1212
} from "./utils.js";
1313
import { tts } from "./tts.js";
1414
import { toPdf } from "./pdf_export.js";
1515
import { loadLexicon } from "./lexicon.js";
1616
import { macronize } from "./macronizer.js";
17+
1718
document.querySelector("#lang").disabled = false;
1819

1920
async function prepareTranscribe(lang) {
@@ -492,31 +493,44 @@ async function updateOptionsUponLanguageSelection(event) {
492493
const selectedLanguageElement = event.target;
493494
const selectedLanguage = selectedLanguageElement.value;
494495
const lang = languages[selectedLanguage];
496+
const urlParams = new URLSearchParams(window.location.search);
497+
let useDictionary = urlParams.get("dict");
498+
if (useDictionary === null) {
499+
useDictionary = "true";
500+
}
495501

496502
try {
497-
window.history.pushState({}, "", `?lang=${selectedLanguage}`);
503+
if (urlParams.get("lang") !== selectedLanguage) {
504+
window.history.pushState({}, "", `?lang=${selectedLanguage}`);
505+
}
498506
} catch (err) {
499507
console.log(err);
500508
}
501509
if (!(selectedLanguage in loadedLanguages)) {
502510
disableAll();
503511
await loadLanguage(lang.langCode);
512+
globalThis.lexicon = null;
504513
if (selectedLanguage === "Latin") {
505514
updateLoadingText("Macrons list", "");
506515
await macronize("");
507516
updateLoadingText("", "");
508517
}
509518

510-
if (selectedLanguage === "German") {
519+
if (selectedLanguage === "German" && useDictionary === "true") {
511520
updateLoadingText("German lexicon", "");
512521
globalThis.lexicon = await loadLexicon("German");
513522
updateLoadingText("", "");
514523
}
515-
if (selectedLanguage === "Czech") {
524+
if (selectedLanguage === "Czech" && useDictionary === "true") {
516525
updateLoadingText("Czech lexicon", "");
517526
globalThis.lexicon = await loadLexicon("Czech");
518527
updateLoadingText("", "");
519528
}
529+
if (selectedLanguage === "French" && useDictionary === "true") {
530+
updateLoadingText("French lexicon", "");
531+
globalThis.lexicon = await loadLexicon("French");
532+
updateLoadingText("", "");
533+
}
520534

521535
enableAll();
522536
loadedLanguages[selectedLanguage] = true;

wiktionary_pron/scripts/utils.js

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,8 @@ async function asyncMapStrict(arr, fn) {
1313

1414
function sanitize(text) {
1515
return text
16-
.replace(
17-
/[^\p{L}\p{M}'pbtdʈɖcɟkɡqɢʔmɱnɳɲŋɴʙrʀɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟʘǀǃǂǁɓɗʄɠʛʼiyɨʉɯuɪʏʊeøɘɵɤoəɛœɜɞʌɔæɐaɶɑɒʍwɥʜʢʡɕʑɺɧ͜͡ˈˌːˑ̆|.̥̬ʰ̹̜̟̠̩̯̈̽˞̤̰̼ʷʲˠˤ̴̝̞̘̙̪̺̻̃ˡ̋̚˥̌˩́˦̂̄˧̀˨̏-]/gu,
18-
"",
19-
)
16+
.replace(/[^\p{L}\p{M}'-]/gu, "")
17+
.replaceAll("’", "'")
2018
.normalize("NFKC");
2119
}
2220

@@ -85,8 +83,8 @@ function clearStorage() {
8583
}
8684

8785
function get_ipa_no_cache(text, args) {
88-
console.log("doing actual IPA", text, args);
8986
const cleanText = sanitize(text);
87+
console.log("doing actual IPA", text, cleanText, args);
9088

9189
const [lang, langStyle, langForm] = args.split(";");
9290
let command = "";
@@ -116,18 +114,20 @@ function get_ipa_no_cache(text, args) {
116114
break;
117115
case "German":
118116
if (langForm === "Phonemic") {
119-
let dictRecord = globalThis.lexicon.get(
120-
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
121-
);
122-
if (!dictRecord) {
123-
dictRecord = globalThis.lexicon.get(
124-
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
117+
if (globalThis.lexicon) {
118+
let dictRecord = globalThis.lexicon.get(
119+
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
125120
);
126-
}
127-
console.log(cleanText, dictRecord);
128-
if (dictRecord) {
129-
command = 'ipa="' + dictRecord + '";';
130-
break;
121+
if (!dictRecord) {
122+
dictRecord = globalThis.lexicon.get(
123+
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
124+
);
125+
}
126+
console.log(cleanText, dictRecord);
127+
if (dictRecord) {
128+
command = 'ipa="' + dictRecord + '";';
129+
break;
130+
}
131131
}
132132
}
133133
command =
@@ -150,6 +150,21 @@ function get_ipa_no_cache(text, args) {
150150
break;
151151
case "French":
152152
if (langForm === "Phonemic") {
153+
if (globalThis.lexicon) {
154+
let dictRecord = globalThis.lexicon.get(
155+
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
156+
);
157+
if (!dictRecord) {
158+
dictRecord = globalThis.lexicon.get(
159+
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
160+
);
161+
}
162+
console.log(cleanText, dictRecord);
163+
if (dictRecord) {
164+
command = 'ipa="' + dictRecord + '";';
165+
break;
166+
}
167+
}
153168
command = `(window.fr_ipa.show("${cleanText}")[0])`;
154169
}
155170

@@ -166,20 +181,21 @@ function get_ipa_no_cache(text, args) {
166181
break;
167182
case "Czech":
168183
if (langForm === "Phonemic") {
169-
let dictRecord = globalThis.lexicon.get(
170-
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
171-
);
172-
if (!dictRecord) {
173-
dictRecord = globalThis.lexicon.get(
174-
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
184+
if (globalThis.lexicon) {
185+
let dictRecord = globalThis.lexicon.get(
186+
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, ""),
175187
);
188+
if (!dictRecord) {
189+
dictRecord = globalThis.lexicon.get(
190+
cleanText.replace(/[^\p{Letter}\p{Mark}-]+/gu, "").toLowerCase(),
191+
);
192+
}
193+
console.log(cleanText, dictRecord);
194+
if (dictRecord) {
195+
command = 'ipa="' + dictRecord + '";';
196+
break;
197+
}
176198
}
177-
console.log(cleanText, dictRecord);
178-
if (dictRecord) {
179-
command = 'ipa="' + dictRecord + '";';
180-
break;
181-
}
182-
183199
command = `(window.cs_ipa.toIPA("${cleanText}"))`;
184200
}
185201
break;
@@ -227,10 +243,11 @@ function get_ipa_no_cache(text, args) {
227243
if (!ipa) {
228244
return { value: text, status: "error" };
229245
}
230-
246+
console.log("before replace ipa ", ipa);
231247
if (langStyle === "Parisian (experimental)") {
232248
ipa = ipa
233-
.replace("ɔ̃̃̃̃̃", "õ")
249+
.replace("ɔ̃̃̃̃̃̃", "õ")
250+
.replace("ɔ̃", "õ")
234251
.replace("ɑ̃", "ɔ̃")
235252
.replace("œ̃", "ɑ̃")
236253
.replace("ɛ̃", "ɑ̃");
8.91 MB
Binary file not shown.

0 commit comments

Comments
 (0)