From fc18cf3abf6ad774d2175423cd6e4074fb84d983 Mon Sep 17 00:00:00 2001 From: Peter Chapman Date: Thu, 16 Apr 2026 14:15:12 +1200 Subject: [PATCH] Add support for per-chapter remarks --- .../Corpora/ParatextProjectTextUpdaterBase.cs | 2 +- .../Corpora/UpdateUsfmParserHandler.cs | 71 +++++++++++++---- .../Corpora/UpdateUsfmParserHandlerTests.cs | 77 ++++++++++++++++++- 3 files changed, 130 insertions(+), 20 deletions(-) diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 5b0731c4..13b71b84 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -29,7 +29,7 @@ public string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null, + IEnumerable<(int, string)> remarks = null, Func errorHandler = null, bool compareSegments = false ) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 76a59336..d1e067cf 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -60,7 +60,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase private readonly HashSet _preserveParagraphStyles; private readonly Stack _updateBlocks; private readonly Stack _updateBlockHandlers; - private readonly List _remarks; + private readonly List<(int, string)> _remarks; private readonly Stack _replace; private int _tokenIndex; private readonly Func _errorHandler; @@ -76,7 +76,7 @@ public UpdateUsfmParserHandler( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable preserveParagraphStyles = null, IEnumerable updateBlockHandlers = null, - IEnumerable remarks = null, + IEnumerable<(int, string)> remarks = null, Func errorHandler = null, bool compareSegments = false ) @@ -107,7 +107,7 @@ public UpdateUsfmParserHandler( preserveParagraphStyles == null ? new HashSet { "r", "rem" } : new HashSet(preserveParagraphStyles); - _remarks = remarks?.ToList() ?? new List(); + _remarks = remarks?.ToList() ?? new List<(int, string)>(); _errorHandler = errorHandler; if (_errorHandler == null) _errorHandler = (error) => false; @@ -433,26 +433,67 @@ public string GetUsfm(string stylesheetFileName = "usfm.sty") public string GetUsfm(UsfmStylesheet stylesheet) { var tokenizer = new UsfmTokenizer(stylesheet); - List tokens = new List(_tokens); - if (_remarks.Count() > 0) + var tokens = new List(_tokens); + if (_remarks.Count > 0) { - var remarkTokens = new List(); - foreach (string remark in _remarks) + var remarkTokensByChapter = new Dictionary>(); + foreach ((int chapterNum, string remark) in _remarks) { - remarkTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); - remarkTokens.Add(new UsfmToken(remark)); + // Add the remark tokens for each chapter that is to have remarks + if (!remarkTokensByChapter.TryGetValue(chapterNum, out List chapterTokens)) + { + chapterTokens = new List(); + remarkTokensByChapter.Add(chapterNum, chapterTokens); + } + + chapterTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null)); + chapterTokens.Add(new UsfmToken(remark)); } if (tokens.Count > 0) { - int index = 0; - HashSet markersToSkip = new HashSet() { "id", "ide", "rem" }; - while (markersToSkip.Contains(tokens[index].Marker)) + foreach (KeyValuePair> remarkTokens in remarkTokensByChapter) { - index++; - if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) + int index; + HashSet markersToSkip; + if (remarkTokens.Key == 0) + { + // Add the remarks at the top level of the USFM, + // after the book id, encode, and any initial comments + index = 0; + markersToSkip = new HashSet { "id", "ide", "rem" }; + } + else + { + // Add the remarks just after the specified chapter, + // skipping any alternate and published chapter numbers + index = tokens.FindIndex(t => + t.Type == UsfmTokenType.Chapter + && int.TryParse(t.Data, out int chapterNumber) + && chapterNumber == remarkTokens.Key + ); + if (index == -1) + continue; index++; + markersToSkip = new HashSet(); + } + + if (index >= tokens.Count) + { + // The remark insertion point is at the very end + tokens.AddRange(remarkTokens.Value); + } + else + { + while (markersToSkip.Contains(tokens[index].Marker)) + { + index++; + if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text) + index++; + } + + tokens.InsertRange(index, remarkTokens.Value); + } } - tokens.InsertRange(index, remarkTokens); } } diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index 9b5219c3..e6f6ac65 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -1380,7 +1380,7 @@ public void GetUsfm_IdTags() } [Test] - public void GetUsfm_PreferExisting_AddRemark() + public void GetUsfm_PreferExisting_AddRemarkToStart() { var rows = new List { @@ -1400,7 +1400,7 @@ public void GetUsfm_PreferExisting_AddRemark() rows, usfm, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: ["New remark"] + remarks: [(0, "New remark")] ); string result = @"\id MAT - Test @@ -1419,7 +1419,7 @@ public void GetUsfm_PreferExisting_AddRemark() rows, target, textBehavior: UpdateUsfmTextBehavior.PreferExisting, - remarks: ["New remark 2"] + remarks: [(0, "New remark 2")] ); result = @"\id MAT - Test @@ -1436,6 +1436,75 @@ public void GetUsfm_PreferExisting_AddRemark() AssertUsfmEquals(target, result); } + [Test] + public void GetUsfm_PreferExisting_AddRemarkToChapter() + { + var rows = new List + { + new UpdateUsfmRow(ScrRef("MAT 2:1"), "Update 1"), + new UpdateUsfmRow(ScrRef("MAT 2:2"), "Update 2"), + }; + string usfm = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem Existing remark +\v 1 Some text +\v 2 +\v 3 Other text +\c 3 +"; + string target = UpdateUsfm( + rows, + usfm, + textBehavior: UpdateUsfmTextBehavior.PreferExisting, + remarks: [(2, "New remark"), (3, "Last remark"), (4, "Remark for missing chapter")] + ); + string result = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem New remark +\rem Existing remark +\v 1 Some text +\v 2 Update 2 +\v 3 Other text +\c 3 +\rem Last remark +"; + + AssertUsfmEquals(target, result); + + target = UpdateUsfm( + rows, + target, + textBehavior: UpdateUsfmTextBehavior.PreferExisting, + remarks: [(1, "New remark 2"), (2, "New remark 3")] + ); + result = + @"\id MAT - Test +\ide UTF-8 +\c 1 +\rem New remark 2 +\v 1 Chapter 1, Verse 1 +\c 2 +\rem New remark 3 +\rem New remark +\rem Existing remark +\v 1 Some text +\v 2 Update 2 +\v 3 Other text +\c 3 +\rem Last remark +"; + + AssertUsfmEquals(target, result); + } + [Test] public void UpdateBlock_FootnoteInPublishedChapterNumber() { @@ -1536,7 +1605,7 @@ private static string UpdateUsfm( UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip, IEnumerable? preserveParagraphStyles = null, IEnumerable? usfmUpdateBlockHandlers = null, - IEnumerable? remarks = null, + IEnumerable<(int, string)>? remarks = null, bool compareSegments = false ) {