Skip to content

Commit

Permalink
This is to fix: sillsdev/serval#424.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Jul 10, 2024
1 parent 6979680 commit f648aa4
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/SIL.Machine/Corpora/UsfmTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,16 @@ public IReadOnlyList<UsfmToken> Tokenize(string usfm, bool preserveWhitespace =
else if (tokens[i - 1].Type == UsfmTokenType.End)
{
// Insert space token after * of end marker
int colNum = usfm.Length + 1 - Math.Max(usfm.LastIndexOf('\n', index), 0);
int colNum;
if (index >= usfm.Length)
{
colNum = usfm.Length + 1;
}
else
{
colNum = usfm.Length + 1 - Math.Max(usfm.LastIndexOf('\n', index), 0);
}

tokens.Insert(
i,
new UsfmToken(UsfmTokenType.Text, null, " ", null)
Expand Down
17 changes: 17 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@ public void GetRows_LastSegment()
});
}

[Test]
public void GetRows_Ending_ParagraphMarker()
{
//The ending paragraph marker should not crash the parser.
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\v 1 Descriptive title\x - \xo 18:16 \xt hello world\x*\p
"
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(1));
});
}

private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false)
{
UsfmMemoryText text =
Expand Down

0 comments on commit f648aa4

Please sign in to comment.