From 7a4eab57ff21fc886c43cb58b5ab94af2bbc7985 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Mon, 13 Oct 2025 09:54:20 -0700 Subject: [PATCH 1/6] Fix LT-22261: import word categories using flextext --- .../Interlinear/BIRDInterlinearImporter.cs | 100 +++++++++++++++--- 1 file changed, 86 insertions(+), 14 deletions(-) diff --git a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs index 7bef0eba84..fb43a7fc4e 100644 --- a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs +++ b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs @@ -2,24 +2,23 @@ // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using System.Text; -using System.Windows.Forms; -using SIL.LCModel.Core.Text; -using SIL.LCModel.Core.WritingSystems; -using SIL.LCModel.Core.KernelInterfaces; +using SIL.Extensions; using SIL.FieldWorks.Common.FwUtils; -using SIL.LCModel; -using SIL.LCModel.DomainServices; using SIL.FieldWorks.IText.FlexInterlinModel; +using SIL.LCModel; using SIL.LCModel.Application.ApplicationServices; using SIL.LCModel.Core.Cellar; +using SIL.LCModel.Core.KernelInterfaces; +using SIL.LCModel.Core.Text; +using SIL.LCModel.Core.WritingSystems; +using SIL.LCModel.DomainServices; using SIL.LCModel.Infrastructure; using SIL.LCModel.Utils; -using SIL.Extensions; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Windows.Forms; namespace SIL.FieldWorks.IText { @@ -807,6 +806,55 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word } } + // Try to fill in category. + if (word.Items != null && wordForm.Analysis != null) + { + // Look for an existing category that matches a "pos". + bool catFound = false; + foreach (var item in word.Items) + { + if (wordForm.Analysis.CategoryRA != null) + { + // Category filled in. + break; + } + if (item.type == "pos") + { + ILgWritingSystem writingSystem = GetWsEngine(cache.WritingSystemFactory, item.lang); + if (writingSystem != null) + { + foreach (var cat in cache.LanguageProject.AllPartsOfSpeech) + { + if (MatchesCatNameOrAbbreviation(writingSystem.Handle, item.Value, cat)) + { + wordForm.Analysis.CategoryRA = cat; + catFound = true; + break; + } + } + } + } + } + if (catFound && wordForm.Analysis.CategoryRA == null) + { + // Create a new category. + IPartOfSpeech cat = cache.ServiceLocator.GetInstance().Create(); + cache.LanguageProject.PartsOfSpeechOA.PossibilitiesOS.Add(cat); + foreach (var item in word.Items) + { + if (item.type == "pos") + { + ILgWritingSystem writingSystem = GetWsEngine(cache.WritingSystemFactory, item.lang); + if (writingSystem != null) + { + cat.Name.set_String(writingSystem.Handle, item.Value); + } + } + } + wordForm.Analysis.CategoryRA = cat; + } + } + return wordForm; } @@ -819,6 +867,7 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, // First, collect all expected forms and glosses from the Word var expectedForms = new Dictionary(); // wsHandle -> expected value var expectedGlosses = new Dictionary(); // wsHandle -> expected gloss + var expectedCats = new Dictionary(); // wsHandle -> expected cat IAnalysis candidateForm = null; ITsString wordForm = null; ITsString punctForm = null; @@ -870,6 +919,10 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, expectedGlosses[ws.Handle] = wordItem.Value; break; + + case "pos": + expectedCats[ws.Handle] = wordItem.Value; + break; } } @@ -922,7 +975,7 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, if (morphemeMatch) { - var matchingGloss = wfiAnalysis.MeaningsOC.FirstOrDefault(g => VerifyGlossesMatch(g, expectedGlosses)); + var matchingGloss = wfiAnalysis.MeaningsOC.FirstOrDefault(g => VerifyGlossesMatch(g, expectedGlosses, expectedCats)); if (matchingGloss != null) { analysis = matchingGloss; @@ -1030,7 +1083,8 @@ private static bool MatchPrimaryFormAndAddMissingAlternatives(IAnalysis wordForm // Helper method to verify that all expected glosses match the stored glosses private static bool VerifyGlossesMatch(IWfiGloss wfiGloss, - Dictionary expectedGlosses) + Dictionary expectedGlosses, + Dictionary expectedCats) { foreach (var expectedGloss in expectedGlosses) { @@ -1041,10 +1095,28 @@ private static bool VerifyGlossesMatch(IWfiGloss wfiGloss, if (storedGloss == null || storedGloss.Text != expectedValue) return false; // Mismatch found } + foreach (var expectedCat in expectedCats) + { + if (!MatchesCatNameOrAbbreviation(expectedCat.Key, expectedCat.Value, wfiGloss.Analysis?.CategoryRA)) + return false; + } return true; } + private static bool MatchesCatNameOrAbbreviation(int ws, string text, IPartOfSpeech cat) + { + if (cat == null) + return false; + ITsString name = cat.Name.get_String(ws); + if (name != null && name.Text == text) + return true; + ITsString abbr = cat.Abbreviation.get_String(ws); + if (abbr != null && abbr.Text == text) + return true; + return false; + } + /// /// /// The word Gloss. If multiple glosses, returns the last one created. From 83bb7d18c86833361db286ed3739d2f052760764 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Mon, 13 Oct 2025 12:10:39 -0700 Subject: [PATCH 2/6] Add unit tests for importing categories --- .../Interlinear/BIRDInterlinearImporter.cs | 7 +- .../ITextDllTests/BIRDFormatImportTests.cs | 72 +++++++++++++++++++ 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs index fb43a7fc4e..35d437d198 100644 --- a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs +++ b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs @@ -810,7 +810,7 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word if (word.Items != null && wordForm.Analysis != null) { // Look for an existing category that matches a "pos". - bool catFound = false; + bool hasPOS = false; foreach (var item in word.Items) { if (wordForm.Analysis.CategoryRA != null) @@ -820,6 +820,7 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word } if (item.type == "pos") { + hasPOS = true; ILgWritingSystem writingSystem = GetWsEngine(cache.WritingSystemFactory, item.lang); if (writingSystem != null) { @@ -828,14 +829,13 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word if (MatchesCatNameOrAbbreviation(writingSystem.Handle, item.Value, cat)) { wordForm.Analysis.CategoryRA = cat; - catFound = true; break; } } } } } - if (catFound && wordForm.Analysis.CategoryRA == null) + if (hasPOS && wordForm.Analysis.CategoryRA == null) { // Create a new category. IPartOfSpeech cat = cache.ServiceLocator.GetInstance().Create(); @@ -848,6 +848,7 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word if (writingSystem != null) { cat.Name.set_String(writingSystem.Handle, item.Value); + cat.Abbreviation.set_String(writingSystem.Handle, item.Value); } } } diff --git a/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs b/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs index dc0e205ba1..2b236b1e73 100644 --- a/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs +++ b/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs @@ -789,6 +789,78 @@ public void OneOfEachElementTypeTest() } } + [Test] + public void TestExistingWordCategory() + { + string title = "atrocious"; + string abbr = "atroc"; + //an interliner text example xml string + string xml = "" + + "" + + "1 Musical" + + "origem: mary poppins" + + "supercalifragilisticexpialidocious" + + "absurdo" + + "N" + + ""; + + // Create a category to find. + IPartOfSpeech cat = null; + NonUndoableUnitOfWorkHelper.Do(Cache.ActionHandlerAccessor, + () => + { + cat = Cache.ServiceLocator.GetInstance().Create(); + Cache.LanguageProject.PartsOfSpeechOA.PossibilitiesOS.Add(cat); + cat.Name.set_String(Cache.DefaultAnalWs, "N"); + }); + LinguaLinksImport li = new LinguaLinksImport(Cache, null, null); + LCModel.IText text = null; + using (var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray()))) + { + li.ImportInterlinear(new DummyProgressDlg(), stream, 0, ref text); + using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator()) + { + firstEntry.MoveNext(); + var imported = firstEntry.Current; + ISegment segment = imported.ContentsOA[0].SegmentsOS[0]; + // Verify that we found the category. + Assert.True(segment.AnalysesRS[0].Analysis.CategoryRA.Equals(cat)); + } + } + } + + [Test] + public void TestNewWordCategory() + { + string title = "atrocious"; + string abbr = "atroc"; + //an interliner text example xml string + string xml = "" + + "" + + "1 Musical" + + "origem: mary poppins" + + "supercalifragilisticexpialidocious" + + "absurdo" + + "X" + + ""; + + LinguaLinksImport li = new LinguaLinksImport(Cache, null, null); + LCModel.IText text = null; + using (var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray()))) + { + li.ImportInterlinear(new DummyProgressDlg(), stream, 0, ref text); + using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator()) + { + firstEntry.MoveNext(); + var imported = firstEntry.Current; + ISegment segment = imported.ContentsOA[0].SegmentsOS[0]; + // Verify that we created a category. + Assert.True(segment.AnalysesRS[0].Analysis.CategoryRA.Name.BestAnalysisAlternative.Text.Equals("X")); + Assert.True(segment.AnalysesRS[0].Analysis.CategoryRA.Abbreviation.BestAnalysisAlternative.Text.Equals("X")); + } + } + } + [Test] public void TestSpacesAroundPunct() { From 28cc18736f4cc279fd19c56ac63c0d2fa3a76539 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Wed, 15 Oct 2025 10:18:08 -0700 Subject: [PATCH 3/6] Fix bugs with matching analyses --- .../Interlinear/BIRDInterlinearImporter.cs | 113 +++++++++++++++--- 1 file changed, 97 insertions(+), 16 deletions(-) diff --git a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs index 35d437d198..d2cdc2cbaf 100644 --- a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs +++ b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs @@ -769,8 +769,9 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word { int ws_cf = GetWsEngine(wsFact, itemDict["cf"].Item1).Handle; ILexEntry entry = null; + // Match on lexeme, not citation form. var entries = lex_entry_repo.AllInstances().Where( - m => StringServices.CitationFormWithAffixTypeStaticForWs(m, ws_cf, string.Empty) == itemDict["cf"].Item2); + m => DecorateFormWithAffixMarkers(m.LexemeFormOA, LexemeFormStaticForWs(m, ws_cf, string.Empty)) == itemDict["cf"].Item2); if (entries.Count() == 1) { entry = entries.First(); @@ -786,21 +787,35 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word if (itemDict.ContainsKey("gls")) // Lex. Gloss { int ws_gls = GetWsEngine(wsFact, itemDict["gls"].Item1).Handle; - ILexSense sense = entry.SensesOS.FirstOrDefault(s => s.Gloss.get_String(ws_gls).Text == itemDict["gls"].Item2); - if (sense != null) + IEnumerable senses = entry.SensesOS.Where(s => s.Gloss.get_String(ws_gls).Text == itemDict["gls"].Item2); + int sensesCount = senses.Count(); + if (senses.Count() > 1 && itemDict.ContainsKey("msa")) { - bundle.SenseRA = sense; + // Filter by MSA. + IEnumerable msaSenses = senses.Where(s => s.MorphoSyntaxAnalysisRA?.InterlinearAbbr == itemDict["msa"].Item2); + if (msaSenses.Count() > 0) + { + senses = msaSenses; + } } + bundle.SenseRA = senses.FirstOrDefault(); } } } if (itemDict.ContainsKey("msa")) // Lex. Gram. Info { - IMoMorphSynAnalysis match = msa_repo.AllInstances().FirstOrDefault(m => m.InterlinearAbbr == itemDict["msa"].Item2); - if (match != null) + if (bundle.SenseRA != null && bundle.SenseRA.MorphoSyntaxAnalysisRA?.InterlinearAbbr == itemDict["msa"].Item2) { - bundle.MsaRA = match; + bundle.MsaRA = bundle.SenseRA.MorphoSyntaxAnalysisRA; + } + else + { + IMoMorphSynAnalysis match = msa_repo.AllInstances().FirstOrDefault(m => m.InterlinearAbbr == itemDict["msa"].Item2); + if (match != null) + { + bundle.MsaRA = match; + } } } } @@ -859,6 +874,43 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word return wordForm; } + // Based on StringServices.DecorateFormWithAffixMarkers. + private static string DecorateFormWithAffixMarkers(IMoForm mForm, string form) + { + // No type info...return simpler version of name. + if (mForm == null) + return form; + + // Add pre- post markers, if any. + var mmt = mForm.MorphTypeRA; + if (mmt == null) + return form; + if (!String.IsNullOrEmpty(mmt.Prefix)) + { + form = mmt.Prefix + form; + } + if (!String.IsNullOrEmpty(mmt.Postfix)) + { + form = form + mmt.Postfix; + } + return form; + } + + // Based on StringServices.LexemeFormStaticForWs. + private static string LexemeFormStaticForWs(ILexEntry entry, int wsVern, string defaultForm) + { + ITsString tss; + // try lexeme form + var form = entry.LexemeFormOA; + if (form != null) + { + tss = form.Form.get_String(wsVern); + if (tss != null && tss.Length > 0) + return tss.Text; + } + return defaultForm; + } + private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, int mainWritingSystem, out IAnalysis analysis) @@ -949,6 +1001,38 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, return true; } + analysis = FindMatchingAnalysis(cache, candidateWordform, word, expectedGlosses, expectedCats); + if (analysis != null) + { + return true; + } + + if (wordForm.Text.ToLower() != wordForm.Text) + { + // Try lowercase. + var lcCandidateForm = cache.ServiceLocator + .GetInstance() + .GetMatchingWordform(wordForm.get_WritingSystemAt(0), wordForm.Text.ToLower()); + if (lcCandidateForm is IWfiWordform lcCandidateWordform) + { + analysis = FindMatchingAnalysis(cache, lcCandidateWordform, word, expectedGlosses, expectedCats); + if (analysis != null) + { + return true; + } + } + } + + // No matching analysis found with all expected gloss and morpheme data + analysis = AddEmptyAnalysisToWordform(cache, candidateWordform); + return false; + } + + private static IAnalysis FindMatchingAnalysis(LcmCache cache, IWfiWordform candidateWordform, Word word, + Dictionary expectedGlosses, Dictionary expectedCats) + { + IAnalysis analysis = null; + var wsFact = cache.WritingSystemFactory; // Look for an analysis that has the correct morphemes and a matching gloss foreach (var wfiAnalysis in candidateWordform.AnalysesOC) { @@ -957,15 +1041,16 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, if (word.morphemes != null && wfiAnalysis.MorphBundlesOS.Count == word.morphemes?.morphs.Length) { analysis = GetMostSpecificAnalysisForWordForm(wfiAnalysis); - for(var i = 0; i < wfiAnalysis.MorphBundlesOS.Count; ++i) + for (var i = 0; i < wfiAnalysis.MorphBundlesOS.Count; ++i) { - var extantMorphForm = wfiAnalysis.MorphBundlesOS[i].Form; + var morphBundle = wfiAnalysis.MorphBundlesOS[i]; + var extantMorphForm = morphBundle.Form; var importMorphForm = word.morphemes.morphs[i].items.FirstOrDefault(item => item.type == "txt"); var importFormWs = GetWsEngine(wsFact, importMorphForm?.lang); // compare the import item to the extant morph form if (importMorphForm == null || extantMorphForm == null || TsStringUtils.IsNullOrEmpty(extantMorphForm.get_String(importFormWs.Handle)) || - !extantMorphForm.get_String(importFormWs.Handle).Text.Normalize() + !DecorateFormWithAffixMarkers(morphBundle.MorphRA, extantMorphForm.get_String(importFormWs.Handle).Text).Normalize() .Equals(importMorphForm.Value?.Normalize())) { morphemeMatch = false; @@ -979,15 +1064,11 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, var matchingGloss = wfiAnalysis.MeaningsOC.FirstOrDefault(g => VerifyGlossesMatch(g, expectedGlosses, expectedCats)); if (matchingGloss != null) { - analysis = matchingGloss; - return true; + return matchingGloss; } } } - - // No matching analysis found with all expected gloss and morpheme data - analysis = AddEmptyAnalysisToWordform(cache, candidateWordform); - return false; + return null; } private static IAnalysis GetMostSpecificAnalysisForWordForm(IAnalysis candidateWordform) From 77967a3cc4f4c3689787fbf7f57e6421c5a984f4 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Thu, 16 Oct 2025 10:30:51 -0700 Subject: [PATCH 4/6] Fix bugs in code used when FindOrCreateWfiAnalysis fails --- .../Interlinear/BIRDInterlinearImporter.cs | 131 ++++++++++-------- 1 file changed, 74 insertions(+), 57 deletions(-) diff --git a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs index d2cdc2cbaf..e77292cd76 100644 --- a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs +++ b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs @@ -767,38 +767,72 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word if (itemDict.ContainsKey("cf")) // Lex. Entries { + // NB: "cf" records the lexeme, not the headword/citation form (in spite of the name). int ws_cf = GetWsEngine(wsFact, itemDict["cf"].Item1).Handle; ILexEntry entry = null; - // Match on lexeme, not citation form. var entries = lex_entry_repo.AllInstances().Where( - m => DecorateFormWithAffixMarkers(m.LexemeFormOA, LexemeFormStaticForWs(m, ws_cf, string.Empty)) == itemDict["cf"].Item2); - if (entries.Count() == 1) + m => DecorateFormWithAffixMarkers(m.LexemeFormOA?.MorphTypeRA, m.LexemeFormOA?.Form?.get_String(ws_cf)?.Text) == itemDict["cf"].Item2); + + // Filter entries by homograph number. + // If the lexeme and the headword are different, + // then there may be more than one entry with the given homograph number. + // This is because homograph numbers distinguish headwords rather than lexemes. + // If there is no "hn" entry, then the hn is 0. + string hn = "0"; + if (itemDict.ContainsKey("hn")) // Homograph Number { - entry = entries.First(); + hn = itemDict["hn"].Item2; } - else if (itemDict.ContainsKey("hn")) // Homograph Number + var hnEntries = entries.Where(m => m.HomographNumber.ToString() == hn); + if (hnEntries.Count() > 0) { - entry = entries.FirstOrDefault(m => m.HomographNumber.ToString() == itemDict["hn"].Item2); + entries = hnEntries; } - if (entry != null) - { - bundle.MorphRA = entry.LexemeFormOA; - if (itemDict.ContainsKey("gls")) // Lex. Gloss + if (itemDict.ContainsKey("gls")) // Lex. Gloss + { + // Filter senses by gloss. + int ws_gls = GetWsEngine(wsFact, itemDict["gls"].Item1).Handle; + IList senses = new List(); + foreach (var e in entries) { - int ws_gls = GetWsEngine(wsFact, itemDict["gls"].Item1).Handle; - IEnumerable senses = entry.SensesOS.Where(s => s.Gloss.get_String(ws_gls).Text == itemDict["gls"].Item2); - int sensesCount = senses.Count(); - if (senses.Count() > 1 && itemDict.ContainsKey("msa")) + senses.AddRange(e.SensesOS.Where(s => s.Gloss.get_String(ws_gls).Text == itemDict["gls"].Item2)); + } + if (senses.Count() > 1 && itemDict.ContainsKey("msa")) + { + // Filter senses by MSA. + IList msaSenses = senses.Where(s => s.MorphoSyntaxAnalysisRA?.InterlinearAbbr == itemDict["msa"].Item2).ToList(); + if (msaSenses.Count() > 0) { - // Filter by MSA. - IEnumerable msaSenses = senses.Where(s => s.MorphoSyntaxAnalysisRA?.InterlinearAbbr == itemDict["msa"].Item2); - if (msaSenses.Count() > 0) - { - senses = msaSenses; - } + senses = msaSenses; } + } + // Record sense. + if (senses.Count() > 0) + { bundle.SenseRA = senses.FirstOrDefault(); + entry = bundle.SenseRA.Entry; + } + } + + if (entry == null && entries.Count() > 0) + { + entry = entries.First(); + } + + // Record morpheme. + if (entry != null) + { + if (itemDict.ContainsKey("txt")) + { + // Try allomorph first. + var ws_txt = GetWsEngine(wsFact, itemDict["txt"].Item1).Handle; + bundle.MorphRA = entry.AllAllomorphs.Where( + m => DecorateFormWithAffixMarkers(m.MorphTypeRA, m.Form.get_String(ws_txt).Text) == itemDict["txt"].Item2).FirstOrDefault(); + } + if (bundle.MorphRA == null) + { + bundle.MorphRA = entry.LexemeFormOA; } } } @@ -875,16 +909,11 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word } // Based on StringServices.DecorateFormWithAffixMarkers. - private static string DecorateFormWithAffixMarkers(IMoForm mForm, string form) + private static string DecorateFormWithAffixMarkers(IMoMorphType mmt, string form) { - // No type info...return simpler version of name. - if (mForm == null) + if (mmt == null || form == null) return form; - // Add pre- post markers, if any. - var mmt = mForm.MorphTypeRA; - if (mmt == null) - return form; if (!String.IsNullOrEmpty(mmt.Prefix)) { form = mmt.Prefix + form; @@ -896,21 +925,6 @@ private static string DecorateFormWithAffixMarkers(IMoForm mForm, string form) return form; } - // Based on StringServices.LexemeFormStaticForWs. - private static string LexemeFormStaticForWs(ILexEntry entry, int wsVern, string defaultForm) - { - ITsString tss; - // try lexeme form - var form = entry.LexemeFormOA; - if (form != null) - { - tss = form.Form.get_String(wsVern); - if (tss != null && tss.Length > 0) - return tss.Text; - } - return defaultForm; - } - private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, int mainWritingSystem, out IAnalysis analysis) @@ -1001,24 +1015,27 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, return true; } - analysis = FindMatchingAnalysis(cache, candidateWordform, word, expectedGlosses, expectedCats); - if (analysis != null) + if (word.morphemes?.analysisStatus == analysisStatusTypes.humanApproved) { - return true; - } + analysis = FindMatchingAnalysis(cache, candidateWordform, word, expectedGlosses, expectedCats); + if (analysis != null) + { + return true; + } - if (wordForm.Text.ToLower() != wordForm.Text) - { - // Try lowercase. - var lcCandidateForm = cache.ServiceLocator - .GetInstance() - .GetMatchingWordform(wordForm.get_WritingSystemAt(0), wordForm.Text.ToLower()); - if (lcCandidateForm is IWfiWordform lcCandidateWordform) + if (wordForm.Text.ToLower() != wordForm.Text) { - analysis = FindMatchingAnalysis(cache, lcCandidateWordform, word, expectedGlosses, expectedCats); - if (analysis != null) + // Try lowercase. + var lcCandidateForm = cache.ServiceLocator + .GetInstance() + .GetMatchingWordform(wordForm.get_WritingSystemAt(0), wordForm.Text.ToLower()); + if (lcCandidateForm is IWfiWordform lcCandidateWordform) { - return true; + analysis = FindMatchingAnalysis(cache, lcCandidateWordform, word, expectedGlosses, expectedCats); + if (analysis != null) + { + return true; + } } } } @@ -1050,7 +1067,7 @@ private static IAnalysis FindMatchingAnalysis(LcmCache cache, IWfiWordform candi // compare the import item to the extant morph form if (importMorphForm == null || extantMorphForm == null || TsStringUtils.IsNullOrEmpty(extantMorphForm.get_String(importFormWs.Handle)) || - !DecorateFormWithAffixMarkers(morphBundle.MorphRA, extantMorphForm.get_String(importFormWs.Handle).Text).Normalize() + !DecorateFormWithAffixMarkers(morphBundle.MorphRA?.MorphTypeRA, extantMorphForm.get_String(importFormWs.Handle).Text).Normalize() .Equals(importMorphForm.Value?.Normalize())) { morphemeMatch = false; From db080c6ec52a3ea75e717d4461794752c1f37a68 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Fri, 17 Oct 2025 10:48:34 -0700 Subject: [PATCH 5/6] Fix unit tests --- Src/LexText/Interlinear/BIRDInterlinearImporter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs index e77292cd76..0011ea62c6 100644 --- a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs +++ b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs @@ -1015,7 +1015,7 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, return true; } - if (word.morphemes?.analysisStatus == analysisStatusTypes.humanApproved) + if (word.morphemes == null || word.morphemes.analysisStatus == analysisStatusTypes.humanApproved) { analysis = FindMatchingAnalysis(cache, candidateWordform, word, expectedGlosses, expectedCats); if (analysis != null) From a61c1388446c9f2403d4cec5fa00e0297ae70fb6 Mon Sep 17 00:00:00 2001 From: John Maxwell Date: Mon, 20 Oct 2025 07:53:30 -0700 Subject: [PATCH 6/6] Improve conditional on FindMatchingAnalysis --- .../Interlinear/BIRDInterlinearImporter.cs | 34 +++++++++---------- .../ITextDllTests/BIRDFormatImportTests.cs | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs index 0011ea62c6..4798a77a83 100644 --- a/Src/LexText/Interlinear/BIRDInterlinearImporter.cs +++ b/Src/LexText/Interlinear/BIRDInterlinearImporter.cs @@ -1015,27 +1015,24 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word, return true; } - if (word.morphemes == null || word.morphemes.analysisStatus == analysisStatusTypes.humanApproved) + analysis = FindMatchingAnalysis(cache, candidateWordform, word, expectedGlosses, expectedCats); + if (analysis != null) { - analysis = FindMatchingAnalysis(cache, candidateWordform, word, expectedGlosses, expectedCats); - if (analysis != null) - { - return true; - } + return true; + } - if (wordForm.Text.ToLower() != wordForm.Text) + if (wordForm.Text.ToLower() != wordForm.Text) + { + // Try lowercase. + var lcCandidateForm = cache.ServiceLocator + .GetInstance() + .GetMatchingWordform(wordForm.get_WritingSystemAt(0), wordForm.Text.ToLower()); + if (lcCandidateForm is IWfiWordform lcCandidateWordform) { - // Try lowercase. - var lcCandidateForm = cache.ServiceLocator - .GetInstance() - .GetMatchingWordform(wordForm.get_WritingSystemAt(0), wordForm.Text.ToLower()); - if (lcCandidateForm is IWfiWordform lcCandidateWordform) + analysis = FindMatchingAnalysis(cache, lcCandidateWordform, word, expectedGlosses, expectedCats); + if (analysis != null) { - analysis = FindMatchingAnalysis(cache, lcCandidateWordform, word, expectedGlosses, expectedCats); - if (analysis != null) - { - return true; - } + return true; } } } @@ -1055,7 +1052,8 @@ private static IAnalysis FindMatchingAnalysis(LcmCache cache, IWfiWordform candi { var morphemeMatch = true; // verify that the analysis has a Morph Bundle with the expected morphemes from the import - if (word.morphemes != null && wfiAnalysis.MorphBundlesOS.Count == word.morphemes?.morphs.Length) + if (word.morphemes != null && wfiAnalysis.MorphBundlesOS.Count == word.morphemes?.morphs.Length && + word.morphemes.analysisStatus == analysisStatusTypes.humanApproved) { analysis = GetMostSpecificAnalysisForWordForm(wfiAnalysis); for (var i = 0; i < wfiAnalysis.MorphBundlesOS.Count; ++i) diff --git a/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs b/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs index 2b236b1e73..3d719dea05 100644 --- a/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs +++ b/Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs @@ -824,7 +824,7 @@ public void TestExistingWordCategory() var imported = firstEntry.Current; ISegment segment = imported.ContentsOA[0].SegmentsOS[0]; // Verify that we found the category. - Assert.True(segment.AnalysesRS[0].Analysis.CategoryRA.Equals(cat)); + Assert.That(segment.AnalysesRS[0].Analysis.CategoryRA, Is.EqualTo(cat)); } } }