Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public string UpdateUsfm(
UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip,
IEnumerable<string> preserveParagraphStyles = null,
IEnumerable<IUsfmUpdateBlockHandler> updateBlockHandlers = null,
IEnumerable<string> remarks = null,
IEnumerable<(int, string)> remarks = null,
Func<UsfmUpdateBlockHandlerException, bool> errorHandler = null,
bool compareSegments = false
)
Expand Down
71 changes: 56 additions & 15 deletions src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
private readonly HashSet<string> _preserveParagraphStyles;
private readonly Stack<UsfmUpdateBlock> _updateBlocks;
private readonly Stack<IUsfmUpdateBlockHandler> _updateBlockHandlers;
private readonly List<string> _remarks;
private readonly List<(int, string)> _remarks;
private readonly Stack<bool> _replace;
private int _tokenIndex;
private readonly Func<UsfmUpdateBlockHandlerException, bool> _errorHandler;
Expand All @@ -76,7 +76,7 @@ public UpdateUsfmParserHandler(
UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip,
IEnumerable<string> preserveParagraphStyles = null,
IEnumerable<IUsfmUpdateBlockHandler> updateBlockHandlers = null,
IEnumerable<string> remarks = null,
IEnumerable<(int, string)> remarks = null,
Func<UsfmUpdateBlockHandlerException, bool> errorHandler = null,
bool compareSegments = false
)
Expand Down Expand Up @@ -107,7 +107,7 @@ public UpdateUsfmParserHandler(
preserveParagraphStyles == null
? new HashSet<string> { "r", "rem" }
: new HashSet<string>(preserveParagraphStyles);
_remarks = remarks?.ToList() ?? new List<string>();
_remarks = remarks?.ToList() ?? new List<(int, string)>();
_errorHandler = errorHandler;
if (_errorHandler == null)
_errorHandler = (error) => false;
Expand Down Expand Up @@ -433,26 +433,67 @@ public string GetUsfm(string stylesheetFileName = "usfm.sty")
public string GetUsfm(UsfmStylesheet stylesheet)
{
var tokenizer = new UsfmTokenizer(stylesheet);
List<UsfmToken> tokens = new List<UsfmToken>(_tokens);
if (_remarks.Count() > 0)
var tokens = new List<UsfmToken>(_tokens);
if (_remarks.Count > 0)
{
var remarkTokens = new List<UsfmToken>();
foreach (string remark in _remarks)
var remarkTokensByChapter = new Dictionary<int, List<UsfmToken>>();
foreach ((int chapterNum, string remark) in _remarks)
{
remarkTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null));
remarkTokens.Add(new UsfmToken(remark));
// Add the remark tokens for each chapter that is to have remarks
if (!remarkTokensByChapter.TryGetValue(chapterNum, out List<UsfmToken> chapterTokens))
{
chapterTokens = new List<UsfmToken>();
remarkTokensByChapter.Add(chapterNum, chapterTokens);
}

chapterTokens.Add(new UsfmToken(UsfmTokenType.Paragraph, "rem", null, null));
chapterTokens.Add(new UsfmToken(remark));
}
if (tokens.Count > 0)
{
int index = 0;
HashSet<string> markersToSkip = new HashSet<string>() { "id", "ide", "rem" };
while (markersToSkip.Contains(tokens[index].Marker))
foreach (KeyValuePair<int, List<UsfmToken>> remarkTokens in remarkTokensByChapter)
{
index++;
if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text)
int index;
HashSet<string> markersToSkip;
if (remarkTokens.Key == 0)
{
// Add the remarks at the top level of the USFM,
// after the book id, encode, and any initial comments
index = 0;
markersToSkip = new HashSet<string> { "id", "ide", "rem" };
}
else
{
// Add the remarks just after the specified chapter,
// skipping any alternate and published chapter numbers
index = tokens.FindIndex(t =>
t.Type == UsfmTokenType.Chapter
&& int.TryParse(t.Data, out int chapterNumber)
&& chapterNumber == remarkTokens.Key
);
if (index == -1)
continue;
index++;
markersToSkip = new HashSet<string>();
}

if (index >= tokens.Count)
{
// The remark insertion point is at the very end
tokens.AddRange(remarkTokens.Value);
}
else
{
while (markersToSkip.Contains(tokens[index].Marker))
{
index++;
if (tokens.Count > index && tokens[index].Type == UsfmTokenType.Text)
index++;
}

tokens.InsertRange(index, remarkTokens.Value);
}
}
tokens.InsertRange(index, remarkTokens);
}
}

Expand Down
77 changes: 73 additions & 4 deletions tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1380,7 +1380,7 @@ public void GetUsfm_IdTags()
}

[Test]
public void GetUsfm_PreferExisting_AddRemark()
public void GetUsfm_PreferExisting_AddRemarkToStart()
{
var rows = new List<UpdateUsfmRow>
{
Expand All @@ -1400,7 +1400,7 @@ public void GetUsfm_PreferExisting_AddRemark()
rows,
usfm,
textBehavior: UpdateUsfmTextBehavior.PreferExisting,
remarks: ["New remark"]
remarks: [(0, "New remark")]
);
string result =
@"\id MAT - Test
Expand All @@ -1419,7 +1419,7 @@ public void GetUsfm_PreferExisting_AddRemark()
rows,
target,
textBehavior: UpdateUsfmTextBehavior.PreferExisting,
remarks: ["New remark 2"]
remarks: [(0, "New remark 2")]
);
result =
@"\id MAT - Test
Expand All @@ -1436,6 +1436,75 @@ public void GetUsfm_PreferExisting_AddRemark()
AssertUsfmEquals(target, result);
}

[Test]
public void GetUsfm_PreferExisting_AddRemarkToChapter()
{
var rows = new List<UpdateUsfmRow>
{
new UpdateUsfmRow(ScrRef("MAT 2:1"), "Update 1"),
new UpdateUsfmRow(ScrRef("MAT 2:2"), "Update 2"),
};
string usfm =
@"\id MAT - Test
\ide UTF-8
\c 1
\v 1 Chapter 1, Verse 1
\c 2
\rem Existing remark
\v 1 Some text
\v 2
\v 3 Other text
\c 3
";
string target = UpdateUsfm(
rows,
usfm,
textBehavior: UpdateUsfmTextBehavior.PreferExisting,
remarks: [(2, "New remark"), (3, "Last remark"), (4, "Remark for missing chapter")]
);
string result =
@"\id MAT - Test
\ide UTF-8
\c 1
\v 1 Chapter 1, Verse 1
\c 2
\rem New remark
\rem Existing remark
\v 1 Some text
\v 2 Update 2
\v 3 Other text
\c 3
\rem Last remark
";

AssertUsfmEquals(target, result);

target = UpdateUsfm(
rows,
target,
textBehavior: UpdateUsfmTextBehavior.PreferExisting,
remarks: [(1, "New remark 2"), (2, "New remark 3")]
);
result =
@"\id MAT - Test
\ide UTF-8
\c 1
\rem New remark 2
\v 1 Chapter 1, Verse 1
\c 2
\rem New remark 3
\rem New remark
\rem Existing remark
\v 1 Some text
\v 2 Update 2
\v 3 Other text
\c 3
\rem Last remark
";

AssertUsfmEquals(target, result);
}

[Test]
public void UpdateBlock_FootnoteInPublishedChapterNumber()
{
Expand Down Expand Up @@ -1536,7 +1605,7 @@ private static string UpdateUsfm(
UpdateUsfmMarkerBehavior styleBehavior = UpdateUsfmMarkerBehavior.Strip,
IEnumerable<string>? preserveParagraphStyles = null,
IEnumerable<IUsfmUpdateBlockHandler>? usfmUpdateBlockHandlers = null,
IEnumerable<string>? remarks = null,
IEnumerable<(int, string)>? remarks = null,
bool compareSegments = false
)
{
Expand Down
Loading