From e49bd29b79410f1880fcf29b07e1b7e05ef493ac Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 13 Aug 2024 12:47:42 -0400 Subject: [PATCH] Fixes https://github.com/sillsdev/serval/issues/449 --- .../ScriptureRefUsfmParserHandlerBase.cs | 6 +++ src/SIL.Machine/Corpora/UsfmTextBase.cs | 12 +++-- .../Corpora/UsfmMemoryTextTests.cs | 54 +++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index f7e9d5b7..28c1d2ea 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -187,6 +187,12 @@ IReadOnlyList attributes CheckConvertVerseParaToNonVerse(state); } + public override void OptBreak(UsfmParserState state) + { + if (_curTextType.Count == 0) + _curTextType.Push(ScriptureTextType.NonVerse); + } + protected virtual void StartVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } protected virtual void EndVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index ee3d84f8..367e160b 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -216,16 +216,20 @@ public override void EndNote(UsfmParserState state, string marker, bool closed) public override void OptBreak(UsfmParserState state) { - base.OptBreak(state); - if (_text._includeMarkers) { - _rowTexts.Peek().Append("//"); + if (_rowTexts.Count > 0) + _rowTexts.Peek().Append("//"); + else + _rowTexts.Push(new StringBuilder("//")); } - else if (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText) + else if (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText) //When is this not true? { + if (_rowTexts.Count == 0) + return; _rowTexts.Peek().TrimEnd(); } + base.OptBreak(state); } public override void Text(UsfmParserState state, string text) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index 3cb84df1..0029f5ae 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -111,6 +111,27 @@ public void GetRows_TriplicateVerse() }); } + [Test] + public void GetRows_OptBreak() + { + // a verse paragraph that begins with a non-verse segment followed by a verse segment + TextRow[] rows = GetRows( + @"\id MAT - Test +\c 1 +\v 1 First verse in line // More text +\c 2 +\v 1 +", + includeAllText: true, + includeMarkers: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(2), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo(@"First verse in line // More text")); + }); + } + [Test] public void GetRows_VersePara_BeginningNonVerseSegment() { @@ -132,6 +153,39 @@ public void GetRows_VersePara_BeginningNonVerseSegment() Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.ToList().Select(tr => tr.Text))); } + [Test] + public void GetRows_OptBreakAtBeginning() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\li // +", + includeAllText: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("")); + }); + } + + [Test] + public void GetRows_OptBreakAtBeginningIncludeMarkers() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\li // +", + includeAllText: true, + includeMarkers: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("//")); + }); + } + private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) { UsfmMemoryText text =