Skip to content

Commit f055f9e

Browse files
committed
Add support for per-chapter remarks
1 parent 69cf4f9 commit f055f9e

3 files changed

Lines changed: 90 additions & 114 deletions

File tree

src/Serval/src/Serval.Translation/Services/UsfmGenerationService.cs

Lines changed: 65 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -76,24 +76,60 @@ public async Task<string> GetUsfmAsync(
7676
Build? build = (await _builds.GetAllAsync(b => b.EngineRef == engineId, cancellationToken))
7777
.OrderByDescending(b => b.DateFinished)
7878
.FirstOrDefault();
79-
if (build is null || build.DateFinished is null)
79+
if (build?.DateFinished is null)
8080
throw new InvalidOperationException($"Could not find any completed builds for engine '{engineId}'.");
8181

82-
string disclaimerRemark = string.Format(
83-
CultureInfo.InvariantCulture,
84-
AIDisclaimerRemark,
85-
textId,
86-
build.DateFinished.Value.ToUniversalTime().ToString("u")
87-
);
8882
string markerPlacementRemark = GenerateMarkerPlacementRemark(
8983
paragraphMarkerBehavior,
9084
embedBehavior,
9185
styleMarkerBehavior
9286
);
9387

94-
List<string> remarks = [disclaimerRemark, markerPlacementRemark];
88+
ParallelCorpusContract[] parallelCorpora = [.. _contractMapper.Map(build, engine)];
89+
90+
// Get the versification for the project
91+
CorpusBundle corpusBundle = new(parallelCorpora);
92+
ParallelCorpusContract corpusContract = corpusBundle.ParallelCorpora.Single(c => c.Id == corpusId);
93+
CorpusFileContract sourceFile = corpusContract.SourceCorpora[0].Files[0];
94+
ParatextProjectSettings? sourceSettings = corpusBundle.GetSettings(sourceFile.Location);
95+
ScrVers versification = sourceSettings?.Versification ?? ScrVers.Original;
96+
var scriptureRangeParser = new ScriptureRangeParser(versification);
97+
98+
// Generate remarks for every chapter in the book
99+
List<(int, string)> remarks = [];
100+
List<int>? chapters =
101+
build
102+
.Pretranslate?.SelectMany(p => p.SourceFilters ?? [])
103+
.SelectMany(s =>
104+
scriptureRangeParser
105+
.GetChapters(s.ScriptureRange)
106+
.TryGetValue(textId, out List<int>? filterChapters)
107+
? filterChapters
108+
: []
109+
)
110+
.ToList()
111+
?? [];
95112

96-
ParallelCorpusContract[] parallelCorpora = _contractMapper.Map(build, engine).ToArray();
113+
// If there are no chapters, we need to set it to null so that the USFM updater
114+
if (chapters.Count == 0)
115+
chapters = null;
116+
117+
// Get all the chapters needing remarks
118+
IEnumerable<int> chaptersNeedingRemarks =
119+
chapters ?? Enumerable.Range(1, versification.GetLastChapter(Canon.BookIdToNumber(textId)));
120+
121+
// Add remarks to each chapter
122+
foreach (int chapterNum in chaptersNeedingRemarks)
123+
{
124+
string disclaimerRemark = string.Format(
125+
CultureInfo.InvariantCulture,
126+
AIDisclaimerRemark,
127+
$"{textId} {chapterNum}",
128+
build.DateFinished.Value.ToUniversalTime().ToString("u")
129+
);
130+
remarks.Add((chapterNum, disclaimerRemark));
131+
remarks.Add((chapterNum, markerPlacementRemark));
132+
}
97133

98134
IReadOnlyList<Pretranslation> pretranslations = await _pretranslations.GetAllAsync(
99135
pt =>
@@ -126,6 +162,7 @@ public async Task<string> GetUsfmAsync(
126162
corpusId,
127163
textId,
128164
textOrigin == PretranslationUsfmTextOrigin.OnlyExisting ? [] : pretranslations,
165+
chapters,
129166
textBehavior,
130167
Map(paragraphMarkerBehavior),
131168
Map(embedBehavior),
@@ -146,6 +183,7 @@ public async Task<string> GetUsfmAsync(
146183
corpusId,
147184
textId,
148185
textOrigin == PretranslationUsfmTextOrigin.OnlyExisting ? [] : pretranslations,
186+
chapters,
149187
Map(paragraphMarkerBehavior),
150188
Map(embedBehavior),
151189
Map(styleMarkerBehavior),
@@ -163,11 +201,12 @@ private static string UpdateSourceUsfm(
163201
string corpusId,
164202
string bookId,
165203
IReadOnlyList<Pretranslation> pretranslations,
204+
IReadOnlyList<int>? chapters,
166205
UpdateUsfmMarkerBehavior paragraphBehavior,
167206
UpdateUsfmMarkerBehavior embedBehavior,
168207
UpdateUsfmMarkerBehavior styleBehavior,
169208
bool placeParagraphMarkers,
170-
IEnumerable<string>? remarks,
209+
IEnumerable<(int, string)>? remarks,
171210
string? targetQuoteConvention
172211
)
173212
{
@@ -176,6 +215,7 @@ private static string UpdateSourceUsfm(
176215
corpusId,
177216
bookId,
178217
pretranslations,
218+
chapters,
179219
UpdateUsfmTextBehavior.StripExisting,
180220
paragraphBehavior,
181221
embedBehavior,
@@ -192,11 +232,12 @@ private static string UpdateTargetUsfm(
192232
string corpusId,
193233
string bookId,
194234
IReadOnlyList<Pretranslation> pretranslations,
235+
IReadOnlyList<int>? chapters,
195236
UpdateUsfmTextBehavior textBehavior,
196237
UpdateUsfmMarkerBehavior paragraphBehavior,
197238
UpdateUsfmMarkerBehavior embedBehavior,
198239
UpdateUsfmMarkerBehavior styleBehavior,
199-
IEnumerable<string>? remarks,
240+
IEnumerable<(int, string)>? remarks,
200241
string? targetQuoteConvention
201242
)
202243
{
@@ -205,6 +246,7 @@ private static string UpdateTargetUsfm(
205246
corpusId,
206247
bookId,
207248
pretranslations,
249+
chapters,
208250
textBehavior,
209251
paragraphBehavior,
210252
embedBehavior,
@@ -221,12 +263,13 @@ private static string UpdateUsfm(
221263
string corpusId,
222264
string bookId,
223265
IEnumerable<Pretranslation> pretranslations,
266+
IReadOnlyList<int>? chapters,
224267
UpdateUsfmTextBehavior textBehavior,
225268
UpdateUsfmMarkerBehavior paragraphBehavior,
226269
UpdateUsfmMarkerBehavior embedBehavior,
227270
UpdateUsfmMarkerBehavior styleBehavior,
228271
IEnumerable<IUsfmUpdateBlockHandler>? updateBlockHandlers,
229-
IEnumerable<string>? remarks,
272+
IEnumerable<(int, string)>? remarks,
230273
string? targetQuoteConvention,
231274
bool isSource
232275
)
@@ -258,13 +301,15 @@ bool isSource
258301
.Where(row => row.Refs.Any())
259302
.OrderBy(row => row.Refs[0])
260303
.ToArray(),
261-
isSource ? sourceSettings?.FullName : targetSettings?.FullName,
304+
chapters,
305+
fullName: isSource ? sourceSettings?.FullName : targetSettings?.FullName,
262306
textBehavior,
263307
paragraphBehavior,
264308
embedBehavior,
265309
styleBehavior,
266-
updateBlockHandlers: updateBlockHandlers,
267-
remarks: remarks,
310+
preserveParagraphStyles: null,
311+
updateBlockHandlers,
312+
remarks,
268313
errorHandler: (_) => true,
269314
compareSegments: isSource
270315
) ?? "";
@@ -372,31 +417,11 @@ private static string DenormalizeQuotationMarks(string usfm, string quoteConvent
372417
int denormalizableChapterCount = bestChapterStrategies.Count(tup =>
373418
tup.Strategy != QuotationMarkUpdateStrategy.Skip
374419
);
375-
List<string> remarks = [];
376-
string quotationDenormalizationRemark;
377-
if (denormalizableChapterCount == bestChapterStrategies.Count)
378-
{
379-
quotationDenormalizationRemark =
380-
"The quote style in all chapters has been automatically adjusted to match the rest of the project.";
381-
}
382-
else if (denormalizableChapterCount > 0)
383-
{
384-
quotationDenormalizationRemark =
385-
"The quote style in the following chapters has been automatically adjusted to match the rest of the project: "
386-
+ GetChapterRangesString(
387-
bestChapterStrategies
388-
.Where(tuple => tuple.Strategy != QuotationMarkUpdateStrategy.Skip)
389-
.Select(tuple => tuple.ChapterNumber)
390-
.ToList()
391-
)
392-
+ ".";
393-
}
394-
else
395-
{
396-
quotationDenormalizationRemark =
397-
"The quote style was not automatically adjusted to match the rest of your project in any chapters.";
398-
}
399-
remarks.Add(quotationDenormalizationRemark);
420+
List<(int, string)> remarks = [];
421+
const string QuotationDenormalizationRemark =
422+
"The quote style of this chapter has been automatically adjusted to match the rest of the project.";
423+
for (int i = 1; i <= denormalizableChapterCount; i++)
424+
remarks.Add((i, QuotationDenormalizationRemark));
400425

401426
var updater = new UpdateUsfmParserHandler(updateBlockHandlers: [quotationMarkDenormalizer], remarks: remarks);
402427
UsfmParser.Parse(usfm, updater);
@@ -405,43 +430,6 @@ private static string DenormalizeQuotationMarks(string usfm, string quoteConvent
405430
return usfm;
406431
}
407432

408-
internal static string GetChapterRangesString(List<int> chapterNumbers)
409-
{
410-
chapterNumbers = chapterNumbers.Order().ToList();
411-
int start = chapterNumbers[0];
412-
int end = chapterNumbers[0];
413-
List<string> chapterRangeStrings = [];
414-
foreach (int chapterNumber in chapterNumbers[1..])
415-
{
416-
if (chapterNumber == end + 1)
417-
{
418-
end = chapterNumber;
419-
}
420-
else
421-
{
422-
if (start == end)
423-
{
424-
chapterRangeStrings.Add(start.ToString(CultureInfo.InvariantCulture));
425-
}
426-
else
427-
{
428-
chapterRangeStrings.Add($"{start}-{end}");
429-
}
430-
start = chapterNumber;
431-
end = chapterNumber;
432-
}
433-
}
434-
if (start == end)
435-
{
436-
chapterRangeStrings.Add(start.ToString(CultureInfo.InvariantCulture));
437-
}
438-
else
439-
{
440-
chapterRangeStrings.Add($"{start}-{end}");
441-
}
442-
return string.Join(", ", chapterRangeStrings);
443-
}
444-
445433
/// <summary>
446434
/// Generate a natural sounding remark/comment describing marker placement.
447435
/// </summary>

src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2291,10 +2291,10 @@ await _env.Builds.InsertAsync(
22912291
usfm.Replace("\r\n", "\n"),
22922292
Is.EqualTo(
22932293
@"\id MAT - Test1
2294-
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
2295-
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
22962294
\h
22972295
\c 1
2296+
\rem This draft of MAT 1 was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
2297+
\rem Paragraph breaks and embed markers were moved to the end of the verse. Style markers were removed.
22982298
\p
22992299
\v 1 translation
23002300
\v 2

0 commit comments

Comments
 (0)