Skip to content

Commit 0e2df10

Browse files
committed
Combine per-chapter remarks into one remark per chapter
1 parent 2d7a013 commit 0e2df10

3 files changed

Lines changed: 72 additions & 42 deletions

File tree

src/Serval/src/Serval.Translation/Services/UsfmGenerationService.cs

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ ContractMapper contractMapper
1717
private readonly IRepository<Build> _builds = builds;
1818
private readonly ContractMapper _contractMapper = contractMapper;
1919
private const string AIDisclaimerRemark =
20-
"This draft of {0} was generated using AI on {1}. It should be reviewed and edited carefully.";
20+
"This draft of {0} was generated using AI from {1} on {2}. It should be reviewed and edited carefully. {3}";
2121

2222
public async Task<string> GetUsfmAsync(
2323
string engineId,
@@ -125,10 +125,11 @@ public async Task<string> GetUsfmAsync(
125125
CultureInfo.InvariantCulture,
126126
AIDisclaimerRemark,
127127
$"{textId} {chapterNum}",
128-
build.DateFinished.Value.ToUniversalTime().ToString("u")
128+
sourceSettings?.Name ?? "Unknown",
129+
build.DateFinished.Value.ToUniversalTime().ToString("u"),
130+
markerPlacementRemark
129131
);
130132
remarks.Add((chapterNum, disclaimerRemark));
131-
remarks.Add((chapterNum, markerPlacementRemark));
132133
}
133134

134135
IReadOnlyList<Pretranslation> pretranslations = await _pretranslations.GetAllAsync(
@@ -206,7 +207,7 @@ private static string UpdateSourceUsfm(
206207
UpdateUsfmMarkerBehavior embedBehavior,
207208
UpdateUsfmMarkerBehavior styleBehavior,
208209
bool placeParagraphMarkers,
209-
IEnumerable<(int, string)>? remarks,
210+
IEnumerable<(int, string)> remarks,
210211
string? targetQuoteConvention
211212
)
212213
{
@@ -237,7 +238,7 @@ private static string UpdateTargetUsfm(
237238
UpdateUsfmMarkerBehavior paragraphBehavior,
238239
UpdateUsfmMarkerBehavior embedBehavior,
239240
UpdateUsfmMarkerBehavior styleBehavior,
240-
IEnumerable<(int, string)>? remarks,
241+
IEnumerable<(int, string)> remarks,
241242
string? targetQuoteConvention
242243
)
243244
{
@@ -269,7 +270,7 @@ private static string UpdateUsfm(
269270
UpdateUsfmMarkerBehavior embedBehavior,
270271
UpdateUsfmMarkerBehavior styleBehavior,
271272
IEnumerable<IUsfmUpdateBlockHandler>? updateBlockHandlers,
272-
IEnumerable<(int, string)>? remarks,
273+
IEnumerable<(int, string)> remarks,
273274
string? targetQuoteConvention,
274275
bool isSource
275276
)
@@ -309,13 +310,13 @@ bool isSource
309310
styleBehavior,
310311
preserveParagraphStyles: null,
311312
updateBlockHandlers,
312-
remarks,
313+
!string.IsNullOrEmpty(targetQuoteConvention) ? null : remarks, // Ensure we only add remarks once
313314
errorHandler: (_) => true,
314315
compareSegments: isSource
315316
) ?? "";
316317

317318
if (!string.IsNullOrEmpty(targetQuoteConvention))
318-
usfm = DenormalizeQuotationMarks(usfm, targetQuoteConvention);
319+
usfm = DenormalizeQuotationMarks(usfm, targetQuoteConvention, remarks);
319320
return usfm;
320321
}
321322

@@ -396,7 +397,11 @@ pretranslation.Alignment is null
396397
return matrix;
397398
}
398399

399-
private static string DenormalizeQuotationMarks(string usfm, string quoteConvention)
400+
private static string DenormalizeQuotationMarks(
401+
string usfm,
402+
string quoteConvention,
403+
IEnumerable<(int, string)> remarks
404+
)
400405
{
401406
QuoteConvention targetQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName(quoteConvention);
402407
if (targetQuoteConvention is null)
@@ -417,13 +422,29 @@ private static string DenormalizeQuotationMarks(string usfm, string quoteConvent
417422
int denormalizableChapterCount = bestChapterStrategies.Count(tup =>
418423
tup.Strategy != QuotationMarkUpdateStrategy.Skip
419424
);
420-
List<(int, string)> remarks = [];
421425
const string QuotationDenormalizationRemark =
422426
"The quote style of this chapter has been automatically adjusted to match the rest of the project.";
427+
List<(int Chapter, string Remark)> combinedRemarks = [.. remarks];
423428
for (int i = 1; i <= denormalizableChapterCount; i++)
424-
remarks.Add((i, QuotationDenormalizationRemark));
429+
{
430+
int index = combinedRemarks.FindLastIndex(r => r.Chapter == i);
431+
if (index > -1)
432+
{
433+
combinedRemarks[index] = combinedRemarks[index] with
434+
{
435+
Remark = $"{combinedRemarks[index].Remark} {QuotationDenormalizationRemark}",
436+
};
437+
}
438+
else
439+
{
440+
combinedRemarks.Add((i, QuotationDenormalizationRemark));
441+
}
442+
}
425443

426-
var updater = new UpdateUsfmParserHandler(updateBlockHandlers: [quotationMarkDenormalizer], remarks: remarks);
444+
var updater = new UpdateUsfmParserHandler(
445+
updateBlockHandlers: [quotationMarkDenormalizer],
446+
remarks: combinedRemarks
447+
);
427448
UsfmParser.Parse(usfm, updater);
428449

429450
usfm = updater.GetUsfm();

src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2293,8 +2293,7 @@ await _env.Builds.InsertAsync(
22932293
@"\id MAT - Test1
22942294
\h
22952295
\c 1
2296-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
2297-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
2296+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
22982297
\p
22992298
\v 1 translation
23002299
\v 2

src/Serval/test/Serval.Translation.Tests/Services/UsfmGenerationServiceTests.cs

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
namespace Serval.Translation.Services;
22

33
[TestFixture]
4-
public class UsfmGenerationServiceTests
4+
public partial class UsfmGenerationServiceTests
55
{
6+
[GeneratedRegex(@"\\rem.+")]
7+
private static partial Regex RemarkRegex();
8+
69
[Test]
710
public async Task GetUsfmAsync_Source_PreferExisting()
811
{
@@ -18,8 +21,7 @@ public async Task GetUsfmAsync_Source_PreferExisting()
1821
Is.EqualTo(
1922
@"\id MAT - Test1
2023
\c 1
21-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
22-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
24+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
2325
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
2426
\p
2527
\v 2 Chapter 1, verse 2.
@@ -45,8 +47,7 @@ public async Task GetUsfmAsync_Source_PreferPretranslated()
4547
Is.EqualTo(
4648
@"\id MAT - Test1
4749
\c 1
48-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
49-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
50+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
5051
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
5152
\p
5253
\v 2 Chapter 1, verse 2.
@@ -72,8 +73,7 @@ public async Task GetUsfmAsync_Source_OnlyExisting()
7273
Is.EqualTo(
7374
@"\id MAT - Test1
7475
\c 1
75-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
76-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
76+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
7777
\v 1
7878
\p
7979
\v 2
@@ -99,8 +99,7 @@ public async Task GetUsfmAsync_Source_OnlyPretranslated()
9999
Is.EqualTo(
100100
@"\id MAT - Test1
101101
\c 1
102-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
103-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
102+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
104103
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
105104
\p
106105
\v 2 Chapter 1, verse 2.
@@ -127,8 +126,7 @@ public async Task GetUsfmAsync_Source_PlaceMarkers()
127126
Is.EqualTo(
128127
@"\id MAT - Test1
129128
\c 1
130-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
131-
\rem Embed markers were moved to the end of the verse. Paragraph breaks have positions preserved. Style markers were removed.
129+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Embed markers were moved to the end of the verse. Paragraph breaks have positions preserved. Style markers were removed.
132130
\v 1 Chapter 1, verse 1.
133131
\p ""Translated new paragraph""
134132
\v 2 Chapter 1, verse 2.
@@ -154,8 +152,7 @@ public async Task GetUsfmAsync_Target_PreferExisting()
154152
Is.EqualTo(
155153
@"\id MAT - TRG
156154
\c 1
157-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
158-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
155+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
159156
\v 1 TRG - Chapter one, verse one.
160157
\v 2 Chapter 1, verse 2.
161158
\v 3 TRG - Chapter one, verse three.
@@ -180,8 +177,7 @@ public async Task GetUsfmAsync_Target_PreferPretranslated()
180177
Is.EqualTo(
181178
@"\id MAT - Test3
182179
\c 1
183-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
184-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
180+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
185181
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
186182
\v 2 Chapter 1, verse 2.
187183
\v 3 TRG - Chapter one, verse three.
@@ -219,8 +215,7 @@ public async Task GetUsfmAsync_Auto_TargetBookDoesNotExist()
219215
Is.EqualTo(
220216
@"\id MAT - Test1
221217
\c 1
222-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
223-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
218+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
224219
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
225220
\p
226221
\v 2 Chapter 1, verse 2.
@@ -246,8 +241,7 @@ public async Task GetUsfmAsync_Auto_TargetBookExists()
246241
Is.EqualTo(
247242
@"\id MAT - Test3
248243
\c 1
249-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
250-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
244+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
251245
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
252246
\v 2 Chapter 1, verse 2.
253247
\v 3 TRG - Chapter one, verse three.
@@ -279,11 +273,7 @@ public async Task GetUsfmAsync_Target_OnlyExisting()
279273

280274
lines.Insert(
281275
2,
282-
@"\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully."
283-
);
284-
lines.Insert(
285-
3,
286-
@"\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed."
276+
@"\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed."
287277
);
288278
Assert.That(usfm, Is.EqualTo(string.Join('\n', lines)).IgnoreLineEndings());
289279
}
@@ -303,8 +293,7 @@ public async Task GetUsfmAsync_Target_OnlyPretranslated()
303293
Is.EqualTo(
304294
@"\id MAT - Test3
305295
\c 1
306-
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
307-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
296+
\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
308297
\v 1 Chapter 1, verse 1. ""Translated new paragraph""
309298
\v 2 Chapter 1, verse 2.
310299
\v 3
@@ -337,15 +326,36 @@ public async Task GetUsfmAsync_DenormalizeQuotationMarks()
337326
quotationMarkBehavior: PretranslationNormalizationBehavior.Denormalized
338327
);
339328
Assert.That(usfm, Does.Contain("“Translated new paragraph”"));
340-
Assert.That(Regex.Matches(usfm, @"\\rem"), Has.Count.EqualTo(3));
329+
MatchCollection remarks = RemarkRegex().Matches(usfm);
330+
Assert.That(remarks, Has.Count.EqualTo(1));
331+
Assert.That(
332+
remarks.First().Value,
333+
Is.EqualTo(
334+
@"\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. "
335+
+ "It should be reviewed and edited carefully. "
336+
+ "Paragraph breaks and embed markers were moved to the end of the verse. "
337+
+ "Style markers were removed. "
338+
+ "The quote style of this chapter has been automatically adjusted to match the rest of the project."
339+
)
340+
);
341341

342342
usfm = await env.GetUsfmAsync(
343343
PretranslationUsfmTextOrigin.PreferExisting,
344344
PretranslationUsfmTemplate.Source,
345345
quotationMarkBehavior: PretranslationNormalizationBehavior.Normalized
346346
);
347347
Assert.That(usfm, Does.Contain("\"Translated new paragraph\""));
348-
Assert.That(Regex.Matches(usfm, @"\\rem"), Has.Count.EqualTo(2));
348+
remarks = RemarkRegex().Matches(usfm);
349+
Assert.That(remarks, Has.Count.EqualTo(1));
350+
Assert.That(
351+
remarks.First().Value,
352+
Is.EqualTo(
353+
@"\rem This draft of MAT 1 was generated using AI from Te1 on 1970-01-01 00:00:00Z. "
354+
+ "It should be reviewed and edited carefully. "
355+
+ "Paragraph breaks and embed markers were moved to the end of the verse. "
356+
+ "Style markers were removed."
357+
)
358+
);
349359
}
350360

351361
[Test]

0 commit comments

Comments
 (0)