Skip to content

Commit 4abf849

Browse files
committed
Seed canonical morph types and regenerate search index (#2219)
* Seed canonical morph-types into CRDT projects - Add CanonicalMorphTypes with all 19 morph-type definitions (GUIDs from LibLCM) - Seed morph-types for new projects via PreDefinedData.PredefinedMorphTypes - Seed morph-types for existing projects in MigrateDb (before FTS refresh) - Add EF migration to clear FTS table so headwords are rebuilt with morph tokens - Patch legacy snapshots (empty MorphTypes) in sync layer to prevent duplicates * Stop creating morph-types in tests. They're now prepopulated * Stop printing verify diff content. It's too much. * Seed morph types before API testing * Add descriptions to canonical morph types * Sync morph-types when importing, because they already exist in CRDT * Verify our canonical morph-types match new fwdata projects * Fix non-FTS relevance order with morph-tokens in query
1 parent ec450bd commit 4abf849

32 files changed

Lines changed: 1602 additions & 171 deletions
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
using FwDataMiniLcmBridge.Api;
2+
using FwDataMiniLcmBridge.LcmUtils;
3+
using FwDataMiniLcmBridge.Tests.Fixtures;
4+
using Microsoft.Extensions.DependencyInjection;
5+
using Microsoft.Extensions.Options;
6+
using MiniLcm.Models;
7+
8+
namespace FwDataMiniLcmBridge.Tests;
9+
10+
public class CanonicalMorphTypeTests : IDisposable
11+
{
12+
private readonly ServiceProvider _serviceProvider;
13+
private readonly FwDataMiniLcmApi _api;
14+
private readonly FwDataProject _project;
15+
16+
public CanonicalMorphTypeTests()
17+
{
18+
var services = new ServiceCollection()
19+
.AddTestFwDataBridge(mockProjectLoader: false)
20+
.PostConfigure<FwDataBridgeConfig>(config =>
21+
config.TemplatesFolder = Path.GetFullPath("Templates"))
22+
.BuildServiceProvider();
23+
_serviceProvider = services;
24+
25+
var config = services.GetRequiredService<IOptions<FwDataBridgeConfig>>();
26+
Directory.CreateDirectory(config.Value.ProjectsFolder);
27+
var projectName = $"canonical-morph-types-test_{Guid.NewGuid()}";
28+
_project = new FwDataProject(projectName, config.Value.ProjectsFolder);
29+
var projectLoader = services.GetRequiredService<IProjectLoader>();
30+
projectLoader.NewProject(_project, "en", "en");
31+
32+
var fwDataFactory = services.GetRequiredService<FwDataFactory>();
33+
_api = fwDataFactory.GetFwDataMiniLcmApi(_project, false);
34+
}
35+
36+
public void Dispose()
37+
{
38+
_api.Dispose();
39+
_serviceProvider.Dispose();
40+
if (Directory.Exists(_project.ProjectFolder))
41+
Directory.Delete(_project.ProjectFolder, true);
42+
}
43+
44+
[Fact]
45+
public async Task CanonicalMorphTypes_MatchNewLangProjMorphTypes()
46+
{
47+
var libLcmMorphTypes = await _api.GetMorphTypes().ToArrayAsync();
48+
libLcmMorphTypes.Should().NotBeEmpty();
49+
CanonicalMorphTypes.All.Values.Should().BeEquivalentTo(libLcmMorphTypes);
50+
}
51+
}

backend/FwLite/FwDataMiniLcmBridge.Tests/FwDataMiniLcmBridge.Tests.csproj

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,9 @@
3535
</ItemGroup>
3636
<ItemGroup>
3737
<Folder Include="TestData\" />
38+
<PackageReference Include="SIL.LCModel" GeneratePathProperty="true" />
39+
<Content Include="$(PkgSIL_LCModel)/contentFiles/Templates/*.*"
40+
Link="Templates/%(Filename)%(Extension)"
41+
CopyToOutputDirectory="PreserveNewest" />
3842
</ItemGroup>
39-
</Project>
43+
</Project>

backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortFiel
2222
{
2323
var unknownMorphTypeEntryId = Guid.NewGuid();
2424
Entry[] expected = [
25-
new() { Id = unknownMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1
26-
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2
27-
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6
25+
new() { Id = unknownMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 0
26+
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 10
27+
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 70
2828
];
2929

3030
var ids = expected.Select(e => e.Id).ToHashSet();

backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,16 @@ public static IEnumerable<ILexEntry> ApplyHeadwordOrder(this IEnumerable<ILexEnt
3333
/// </summary>
3434
public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder, string? query = null)
3535
{
36-
var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)));
36+
var projected = entries.Select(e => (
37+
Entry: e,
38+
Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false),
39+
HeadwordWithTokens: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: true)
40+
));
3741
if (order.Ascending)
3842
{
3943
return projected
40-
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
41-
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
44+
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.StartsWithDiacriticMatch(query!) ?? false))
45+
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.ContainsDiacriticMatch(query!) ?? false))
4246
.ThenBy(x => x.Headword?.Length ?? 0)
4347
.ThenBy(x => x.Headword)
4448
.ThenBy(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
@@ -49,8 +53,8 @@ public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<I
4953
else
5054
{
5155
return projected
52-
.OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
53-
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
56+
.OrderBy(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.StartsWithDiacriticMatch(query!) ?? false))
57+
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.ContainsDiacriticMatch(query!) ?? false))
5458
.ThenByDescending(x => x.Headword?.Length ?? 0)
5559
.ThenByDescending(x => x.Headword)
5660
.ThenByDescending(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)

backend/FwLite/FwLiteProjectSync.Tests/FluentAssertGlobalConfig.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using System.Runtime.CompilerServices;
12
using FluentAssertions.Extensibility;
23
using FwLiteProjectSync.Tests;
34

@@ -7,6 +8,12 @@ namespace FwLiteProjectSync.Tests;
78

89
public static class FluentAssertGlobalConfig
910
{
11+
[ModuleInitializer]
12+
internal static void InitVerify()
13+
{
14+
VerifierSettings.OmitContentFromException();
15+
}
16+
1017
public static void Initialize()
1118
{
1219
MiniLcm.Tests.FluentAssertGlobalConfig.Initialize();
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
using FwDataMiniLcmBridge;
2+
using FwDataMiniLcmBridge.LcmUtils;
3+
using FwLiteProjectSync.Tests.Fixtures;
4+
using LcmCrdt;
5+
using Microsoft.Extensions.DependencyInjection;
6+
using Microsoft.Extensions.Options;
7+
using MiniLcm.Models;
8+
9+
namespace FwLiteProjectSync.Tests.Import;
10+
11+
/// <summary>
12+
/// Tests the full MiniLcmImport.Import(IProjectIdentifier) production path,
13+
/// where the CRDT project is created inside Import (with SeedNewProjectData: false).
14+
/// Distinct from <see cref="ImportTests"/> which calls ImportProject() on a pre-initialized CRDT API.
15+
/// </summary>
16+
public class FullImportTests : IAsyncLifetime
17+
{
18+
private const string ProjectFolder = "FullImportTests";
19+
private readonly ServiceProvider _rootServiceProvider;
20+
private readonly AsyncServiceScope _scope;
21+
private IServiceProvider Services => _scope.ServiceProvider;
22+
23+
public FullImportTests()
24+
{
25+
_rootServiceProvider = new ServiceCollection()
26+
.AddSyncServices(ProjectFolder)
27+
.BuildServiceProvider();
28+
_scope = _rootServiceProvider.CreateAsyncScope();
29+
}
30+
31+
public Task InitializeAsync()
32+
{
33+
if (Directory.Exists(ProjectFolder)) Directory.Delete(ProjectFolder, true);
34+
Directory.CreateDirectory(Services.GetRequiredService<IOptions<FwDataBridgeConfig>>().Value.ProjectsFolder);
35+
Directory.CreateDirectory(Services.GetRequiredService<IOptions<LcmCrdtConfig>>().Value.ProjectPath);
36+
return Task.CompletedTask;
37+
}
38+
39+
public async Task DisposeAsync()
40+
{
41+
await _scope.DisposeAsync();
42+
await _rootServiceProvider.DisposeAsync();
43+
}
44+
45+
/// <summary>
46+
/// Regression: Import creates a CRDT project with SeedNewProjectData: false.
47+
/// Morph types must be seeded unconditionally so MorphTypeSync.Sync doesn't throw
48+
/// when it encounters FwData morph types as "new".
49+
/// </summary>
50+
[Fact]
51+
public async Task Import_FullPath_SeedsMorphTypesBeforeImport()
52+
{
53+
// Arrange: create an FwData project with one entry
54+
var projectName = "import-morph-types-" + Guid.NewGuid().ToString("N")[..8];
55+
var projectsFolder = Services.GetRequiredService<IOptions<FwDataBridgeConfig>>().Value.ProjectsFolder;
56+
var fwDataProject = new FwDataProject(projectName, projectsFolder);
57+
Services.GetRequiredService<IProjectLoader>().NewProject(fwDataProject, "en", "en");
58+
59+
using var fwDataApi = Services.GetRequiredService<FwDataFactory>()
60+
.GetFwDataMiniLcmApi(fwDataProject, false);
61+
await fwDataApi.CreateEntry(new Entry
62+
{
63+
Id = Guid.NewGuid(),
64+
LexemeForm = { ["en"] = "test" },
65+
Senses = [new Sense { Gloss = { ["en"] = "a test" } }]
66+
});
67+
68+
// Act: run the production import path (creates CRDT project internally)
69+
var crdtProject = await Services.GetRequiredService<MiniLcmImport>().Import(fwDataProject);
70+
71+
// Assert: morph types were seeded and the entry was imported
72+
var crdtApi = await Services.OpenCrdtProject((CrdtProject)crdtProject);
73+
74+
var morphTypes = await crdtApi.GetMorphTypes().ToArrayAsync();
75+
morphTypes.Should().NotBeEmpty("morph types should be seeded during project creation");
76+
77+
var entries = await crdtApi.GetEntries().ToArrayAsync();
78+
entries.Should().ContainSingle(e => e.LexemeForm["en"] == "test");
79+
}
80+
}

backend/FwLite/FwLiteProjectSync.Tests/Import/ResumableTests.cs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,7 @@ public async Task ImportProject_IsResumable_AcrossRandomFailures()
3838
}).ToList();
3939
var expectedPartsOfSpeech = Enumerable.Range(1, 10)
4040
.Select(i => new PartOfSpeech { Id = Guid.NewGuid(), Name = { ["en"] = $"pos{i}" } }).ToList();
41-
var expectedMorphTypes = Enum.GetValues<MorphTypeKind>()
42-
.Select(typ => new MorphType()
43-
{
44-
Id = Guid.NewGuid(),
45-
Name = new() { ["en"] = $"Test Morph Type {(int)typ} {typ}" },
46-
Abbreviation = new() { ["en"] = $"Tst MrphTyp{(int)typ}" },
47-
Description = new() { { "en", new RichString($"test desc for {typ}") } },
48-
Prefix = null,
49-
Postfix = null,
50-
Kind = typ,
51-
SecondaryOrder = 0
52-
}).ToList();
41+
var expectedMorphTypes = CanonicalMorphTypes.All.Values;
5342

5443
var mockFrom = new Mock<IMiniLcmApi>();
5544
IMiniLcmApi mockTo = new UnreliableApi(
@@ -132,7 +121,6 @@ public async Task ImportProject_IsResumable_AcrossRandomFailures()
132121
createdEntries.Select(e => e.LexemeForm["en"]).Should().BeEquivalentTo(expectedEntries.Select(e => e.LexemeForm["en"]));
133122
createdMorphTypes.Select(e => e.Name["en"]).Should().BeEquivalentTo(expectedMorphTypes.Select(e => e.Name["en"]));
134123
createdMorphTypes.Select(e => e.Kind).Should().BeEquivalentTo(expectedMorphTypes.Select(e => e.Kind));
135-
136124
}
137125

138126

backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ private async Task<SyncResult> SyncOrImportInternal(IMiniLcmApi crdtApi, IMiniLc
7676
{
7777
// Repair any missing translation IDs before doing the full sync, so the sync doesn't have to deal with them
7878
var syncedIdCount = await CrdtRepairs.SyncMissingTranslationIds(projectSnapshot.Entries, fwdata, crdt, dryRun);
79+
80+
// Patch legacy snapshots that were created before morph-type support.
81+
// After seeding, the CRDT has morph-types but the snapshot still has [].
82+
// Without this patch, the diff would see all morph-types as "new" and try to re-add them.
83+
if (projectSnapshot.MorphTypes is null or [])
84+
{
85+
var currentCrdtMorphTypes = await crdt.GetMorphTypes().ToArrayAsync();
86+
if (currentCrdtMorphTypes.Length > 0)
87+
{
88+
projectSnapshot = projectSnapshot with { MorphTypes = currentCrdtMorphTypes };
89+
}
90+
}
7991
}
8092

8193
var syncResult = projectSnapshot is null

backend/FwLite/FwLiteProjectSync/MiniLcmImport.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using MiniLcm;
99
using MiniLcm.Models;
1010
using MiniLcm.Project;
11+
using MiniLcm.SyncHelpers;
1112

1213
namespace FwLiteProjectSync;
1314

@@ -70,11 +71,10 @@ public async Task ImportProject(IMiniLcmApi importTo, IMiniLcmApi importFrom, in
7071
logger.LogInformation("Imported complex form type {Id}", complexFormType.Id);
7172
}
7273

73-
await foreach (var morphType in importFrom.GetMorphTypes())
74-
{
75-
await importTo.CreateMorphType(morphType);
76-
logger.LogInformation("Imported morph type {Id} ({typ})", morphType.Id, morphType.Kind);
77-
}
74+
// Morph types are created automatically for CRDT projects, so we update them instead of creating them
75+
var importFromMorphTypes = await importFrom.GetMorphTypes().ToArrayAsync();
76+
var existingMorphTypes = await importTo.GetMorphTypes().ToArrayAsync();
77+
await MorphTypeSync.Sync(existingMorphTypes, importFromMorphTypes, importTo);
7878

7979
logger.LogInformation("Importing semantic domains");
8080
await importTo.BulkImportSemanticDomains(importFrom.GetSemanticDomains());

backend/FwLite/LcmCrdt.Tests/Changes/UseChangesTests.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,7 @@ private static IEnumerable<ChangeWithDependencies> GetAllChanges()
195195
var createComplexFormComponentChange = new AddEntryComponentChange(complexFormComponent);
196196
yield return new ChangeWithDependencies(createComplexFormComponentChange, [createComplexFormEntryChange, createEntryChange, createSenseChange]);
197197

198-
var morphTypeName = new MultiString { { "en", "test morphtype" } };
199-
var morphTypeAbbreviation = new MultiString { { "en", "test mph" } };
200-
var morphTypeDescription = new RichMultiString { { "en", new RichString("test desc") } };
201-
var morphType = new MorphType { Id = Guid.NewGuid(), Name = morphTypeName, Abbreviation = morphTypeAbbreviation, Description = morphTypeDescription, SecondaryOrder = 0, Kind = MorphTypeKind.Root };
198+
var morphType = CanonicalMorphTypes.All[MorphTypeKind.Root].Copy();
202199
var createMorphTypeChange = new CreateMorphTypeChange(morphType);
203200
yield return new ChangeWithDependencies(createMorphTypeChange);
204201

0 commit comments

Comments
 (0)