Skip to content

Commit d810da0

Browse files
authored
Fix problem with LT-22261 fix reported by Daney (#499)
* Fix problem with LT-22261 fix reported by Daney * Added unit tests requested by Jason * Clean up code to detect guesses
1 parent 691f8eb commit d810da0

2 files changed

Lines changed: 90 additions & 5 deletions

File tree

Src/LexText/Interlinear/BIRDInterlinearImporter.cs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,14 @@ private static void UpdatePhraseTextForWordItems(ILgWritingSystemFactory wsFacto
394394
}
395395
}
396396

397+
private static bool IsGuess(Morphemes item)
398+
{
399+
if (item != null && item.analysisStatusSpecified &&
400+
(item.analysisStatus != analysisStatusTypes.humanApproved))
401+
return true;
402+
return false;
403+
}
404+
397405
private static ITsString GetItemValue(item item, ILgWritingSystemFactory wsFactory)
398406
{
399407
if (item.run != null)
@@ -754,7 +762,8 @@ private static IAnalysis CreateWordformWithWfiAnalysis(LcmCache cache, Word word
754762
return null;
755763

756764
// Fill in morphemes, lex. entries, lex. gloss, and lex.gram.info
757-
if (word.morphemes != null && word.morphemes.morphs.Length > 0)
765+
if (word.morphemes != null && word.morphemes.morphs.Length > 0 &&
766+
word.morphemes.analysisStatus == analysisStatusTypes.humanApproved)
758767
{
759768
var lex_entry_repo = cache.ServiceLocator.GetInstance<ILexEntryRepository>();
760769
var msa_repo = cache.ServiceLocator.GetInstance<IMoMorphSynAnalysisRepository>();
@@ -1007,6 +1016,12 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word,
10071016
break;
10081017
}
10091018
}
1019+
if (word.morphemes != null && word.morphemes.analysisStatus != analysisStatusTypes.humanApproved)
1020+
{
1021+
// If the morphemes were guessed then the glosses and cats were also guessed.
1022+
expectedGlosses.Clear();
1023+
expectedCats.Clear();
1024+
}
10101025

10111026
if (candidateForm == null || !MatchPrimaryFormAndAddMissingAlternatives(candidateForm, expectedForms, mainWritingSystem))
10121027
{
@@ -1024,7 +1039,8 @@ private static bool FindOrCreateWfiAnalysis(LcmCache cache, Word word,
10241039
analysis = candidateWordform;
10251040
// If no glosses or morphemes are expected the wordform itself is the match
10261041
if (expectedGlosses.Count == 0
1027-
&& (word.morphemes == null || word.morphemes.morphs.Length == 0))
1042+
&& (word.morphemes == null || word.morphemes.morphs.Length == 0 ||
1043+
word.morphemes.analysisStatus != analysisStatusTypes.humanApproved))
10281044
{
10291045
analysis = GetMostSpecificAnalysisForWordForm(candidateWordform);
10301046
return true;
@@ -1068,7 +1084,7 @@ private static IAnalysis FindMatchingAnalysis(LcmCache cache, IWfiWordform candi
10681084
var morphemeMatch = true;
10691085
// verify that the analysis has a Morph Bundle with the expected morphemes from the import
10701086
if (word.morphemes != null && wfiAnalysis.MorphBundlesOS.Count == word.morphemes?.morphs.Length &&
1071-
word.morphemes.analysisStatus == analysisStatusTypes.humanApproved)
1087+
!IsGuess(word.morphemes))
10721088
{
10731089
analysis = GetMostSpecificAnalysisForWordForm(wfiAnalysis);
10741090
for (var i = 0; i < wfiAnalysis.MorphBundlesOS.Count; ++i)
@@ -1284,7 +1300,7 @@ private static void UpgradeToWordGloss(Word word, ref IAnalysis wordForm)
12841300
analysisTree = new AnalysisTree(wfiGloss);
12851301
}
12861302
analysisTree.Gloss.Form.set_String(wsNewGloss, wordGlossItem.Value);
1287-
if (word.morphemes?.analysisStatus != analysisStatusTypes.guess)
1303+
if (!IsGuess(word.morphemes))
12881304
// Make sure this analysis is marked as user-approved (green check mark)
12891305
cache.LangProject.DefaultUserAgent.SetEvaluation(
12901306
analysisTree.WfiAnalysis, Opinions.approves);
@@ -1305,7 +1321,7 @@ private static void UpgradeToWordGloss(Word word, ref IAnalysis wordForm)
13051321
}
13061322
}
13071323

1308-
if (wordForm != null && word.morphemes?.analysisStatus == analysisStatusTypes.guess)
1324+
if (wordForm != null && IsGuess(word.morphemes))
13091325
// Ignore gloss if morphological analysis was only a guess.
13101326
wordForm = wordForm.Wordform;
13111327
}

Src/LexText/Interlinear/ITextDllTests/BIRDFormatImportTests.cs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,75 @@ public void TestExistingWordCategory()
943943
}
944944
}
945945

946+
[Test]
947+
public void TestApprovedMorphemes()
948+
{
949+
string title = "atrocious";
950+
string abbr = "atroc";
951+
//an interliner text example xml string
952+
string xml = "<document><interlinear-text>" +
953+
"<paragraphs><paragraph><phrases><phrase>" +
954+
"<item type=\"reference-number\" lang=\"en\">1 Musical</item>" +
955+
"<item type=\"note\" lang=\"pt\">origem: mary poppins</item>" +
956+
"<words><word><item type=\"txt\" lang=\"en\">supercalifragilisticexpialidocious</item>" +
957+
"<morphemes analysisStatus='humanApproved'>" +
958+
"<morph><item type=\"txt\" lang=\"en\">supercali-</item>" +
959+
"<item type=\"gls\" lang=\"pt\">superlative</item></morph>" +
960+
"</morphemes>" +
961+
"<item type=\"gls\" lang=\"pt\">absurdo</item></word>" +
962+
"</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>";
963+
964+
LinguaLinksImport li = new LinguaLinksImport(Cache, null, null);
965+
LCModel.IText text = null;
966+
using (var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())))
967+
{
968+
li.ImportInterlinear(new DummyProgressDlg(), stream, 0, ref text);
969+
using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator())
970+
{
971+
firstEntry.MoveNext();
972+
var imported = firstEntry.Current;
973+
ISegment segment = imported.ContentsOA[0].SegmentsOS[0];
974+
// Verify that we found the morphemes.
975+
Assert.That(segment.AnalysesRS.Count, Is.EqualTo(1));
976+
Assert.That(segment.AnalysesRS[0].Analysis.MorphBundlesOS.Count, Is.EqualTo(1));
977+
}
978+
}
979+
}
980+
981+
[Test]
982+
public void TestGuessedMorphemes()
983+
{
984+
string title = "atrocious";
985+
string abbr = "atroc";
986+
//an interliner text example xml string
987+
string xml = "<document><interlinear-text>" +
988+
"<paragraphs><paragraph><phrases><phrase>" +
989+
"<item type=\"reference-number\" lang=\"en\">1 Musical</item>" +
990+
"<item type=\"note\" lang=\"pt\">origem: mary poppins</item>" +
991+
"<words><word><item type=\"txt\" lang=\"en\">supercalifragilisticexpialidocious2</item>" +
992+
"<morphemes analysisStatus='guess'>" +
993+
"<morph><item type=\"txt\" lang=\"en\">supercali2-</item>" +
994+
"<item type=\"gls\" lang=\"pt\">superlative</item></morph>" +
995+
"</morphemes></word>" +
996+
"</words></phrase></phrases></paragraph></paragraphs></interlinear-text></document>";
997+
998+
LinguaLinksImport li = new LinguaLinksImport(Cache, null, null);
999+
LCModel.IText text = null;
1000+
using (var stream = new MemoryStream(Encoding.ASCII.GetBytes(xml.ToCharArray())))
1001+
{
1002+
li.ImportInterlinear(new DummyProgressDlg(), stream, 0, ref text);
1003+
using (var firstEntry = Cache.LanguageProject.Texts.GetEnumerator())
1004+
{
1005+
firstEntry.MoveNext();
1006+
var imported = firstEntry.Current;
1007+
ISegment segment = imported.ContentsOA[0].SegmentsOS[0];
1008+
// Verify that we ignored the guessed morphemes.
1009+
Assert.That(segment.AnalysesRS.Count, Is.EqualTo(0));
1010+
}
1011+
}
1012+
}
1013+
1014+
9461015
[Test]
9471016
public void TestNewWordCategory()
9481017
{

0 commit comments

Comments
 (0)