From e49bd29b79410f1880fcf29b07e1b7e05ef493ac Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 13 Aug 2024 12:47:42 -0400 Subject: [PATCH 1/3] Fixes https://github.com/sillsdev/serval/issues/449 --- .../ScriptureRefUsfmParserHandlerBase.cs | 6 +++ src/SIL.Machine/Corpora/UsfmTextBase.cs | 12 +++-- .../Corpora/UsfmMemoryTextTests.cs | 54 +++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index f7e9d5b7..28c1d2ea 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -187,6 +187,12 @@ IReadOnlyList attributes CheckConvertVerseParaToNonVerse(state); } + public override void OptBreak(UsfmParserState state) + { + if (_curTextType.Count == 0) + _curTextType.Push(ScriptureTextType.NonVerse); + } + protected virtual void StartVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } protected virtual void EndVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index ee3d84f8..367e160b 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -216,16 +216,20 @@ public override void EndNote(UsfmParserState state, string marker, bool closed) public override void OptBreak(UsfmParserState state) { - base.OptBreak(state); - if (_text._includeMarkers) { - _rowTexts.Peek().Append("//"); + if (_rowTexts.Count > 0) + _rowTexts.Peek().Append("//"); + else + _rowTexts.Push(new StringBuilder("//")); } - else if (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText) + else if (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText) //When is this not true? { + if (_rowTexts.Count == 0) + return; _rowTexts.Peek().TrimEnd(); } + base.OptBreak(state); } public override void Text(UsfmParserState state, string text) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index 3cb84df1..0029f5ae 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -111,6 +111,27 @@ public void GetRows_TriplicateVerse() }); } + [Test] + public void GetRows_OptBreak() + { + // a verse paragraph that begins with a non-verse segment followed by a verse segment + TextRow[] rows = GetRows( + @"\id MAT - Test +\c 1 +\v 1 First verse in line // More text +\c 2 +\v 1 +", + includeAllText: true, + includeMarkers: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(2), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo(@"First verse in line // More text")); + }); + } + [Test] public void GetRows_VersePara_BeginningNonVerseSegment() { @@ -132,6 +153,39 @@ public void GetRows_VersePara_BeginningNonVerseSegment() Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.ToList().Select(tr => tr.Text))); } + [Test] + public void GetRows_OptBreakAtBeginning() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\li // +", + includeAllText: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("")); + }); + } + + [Test] + public void GetRows_OptBreakAtBeginningIncludeMarkers() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\li // +", + includeAllText: true, + includeMarkers: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("//")); + }); + } + private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) { UsfmMemoryText text = From a032d937435c9f429f44b5536bcc89cedc3850b2 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 13 Aug 2024 14:20:25 -0400 Subject: [PATCH 2/3] Fix copy-paste error --- tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index 0029f5ae..708d5823 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -114,7 +114,6 @@ public void GetRows_TriplicateVerse() [Test] public void GetRows_OptBreak() { - // a verse paragraph that begins with a non-verse segment followed by a verse segment TextRow[] rows = GetRows( @"\id MAT - Test \c 1 @@ -135,7 +134,6 @@ public void GetRows_OptBreak() [Test] public void GetRows_VersePara_BeginningNonVerseSegment() { - // a verse paragraph that begins with a non-verse segment followed by a verse segment TextRow[] rows = GetRows( @"\id MAT - Test \c 1 From 6f34aa003efc6564bcb348c4114eeaca4ac12603 Mon Sep 17 00:00:00 2001 From: Damien Daspit Date: Tue, 13 Aug 2024 13:33:44 -0500 Subject: [PATCH 3/3] Use "CheckConvertVerseParaToNonVerse" for opt breaks --- .../ScriptureRefUsfmParserHandlerBase.cs | 11 +++++------ src/SIL.Machine/Corpora/UsfmTextBase.cs | 12 ++++-------- .../Corpora/UsfmMemoryTextTests.cs | 18 +++++++++--------- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index 28c1d2ea..363209ed 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -175,6 +175,11 @@ public override void Text(UsfmParserState state, string text) CheckConvertVerseParaToNonVerse(state); } + public override void OptBreak(UsfmParserState state) + { + CheckConvertVerseParaToNonVerse(state); + } + public override void StartChar( UsfmParserState state, string markerWithoutPlus, @@ -187,12 +192,6 @@ IReadOnlyList attributes CheckConvertVerseParaToNonVerse(state); } - public override void OptBreak(UsfmParserState state) - { - if (_curTextType.Count == 0) - _curTextType.Push(ScriptureTextType.NonVerse); - } - protected virtual void StartVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } protected virtual void EndVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { } diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index 367e160b..ee3d84f8 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -216,20 +216,16 @@ public override void EndNote(UsfmParserState state, string marker, bool closed) public override void OptBreak(UsfmParserState state) { + base.OptBreak(state); + if (_text._includeMarkers) { - if (_rowTexts.Count > 0) - _rowTexts.Peek().Append("//"); - else - _rowTexts.Push(new StringBuilder("//")); + _rowTexts.Peek().Append("//"); } - else if (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText) //When is this not true? + else if (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText) { - if (_rowTexts.Count == 0) - return; _rowTexts.Peek().TrimEnd(); } - base.OptBreak(state); } public override void Text(UsfmParserState state, string text) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index 708d5823..e0712e45 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -83,7 +83,7 @@ public void GetRows_DuplicateVerseWithTable() includeAllText: true ); - Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.Select(tr => tr.Text))); } [Test] @@ -104,7 +104,7 @@ public void GetRows_TriplicateVerse() ); Assert.Multiple(() => { - Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.Select(tr => tr.Text))); Assert.That(rows[0].Text, Is.EqualTo("First verse 1")); Assert.That(rows[3].Text, Is.EqualTo("non verse 3")); Assert.That(rows[4].Text, Is.EqualTo("Second verse")); @@ -112,7 +112,7 @@ public void GetRows_TriplicateVerse() } [Test] - public void GetRows_OptBreak() + public void GetRows_OptBreak_MiddleIncludeMarkers() { TextRow[] rows = GetRows( @"\id MAT - Test @@ -126,7 +126,7 @@ public void GetRows_OptBreak() ); Assert.Multiple(() => { - Assert.That(rows, Has.Length.EqualTo(2), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(2), string.Join(",", rows.Select(tr => tr.Text))); Assert.That(rows[0].Text, Is.EqualTo(@"First verse in line // More text")); }); } @@ -148,11 +148,11 @@ public void GetRows_VersePara_BeginningNonVerseSegment() includeAllText: true ); - Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.Select(tr => tr.Text))); } [Test] - public void GetRows_OptBreakAtBeginning() + public void GetRows_OptBreak_Beginning() { TextRow[] rows = GetRows( @"\id MAT - Test @@ -162,13 +162,13 @@ public void GetRows_OptBreakAtBeginning() ); Assert.Multiple(() => { - Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.Select(tr => tr.Text))); Assert.That(rows[0].Text, Is.EqualTo("")); }); } [Test] - public void GetRows_OptBreakAtBeginningIncludeMarkers() + public void GetRows_OptBreak_BeginningIncludeMarkers() { TextRow[] rows = GetRows( @"\id MAT - Test @@ -179,7 +179,7 @@ public void GetRows_OptBreakAtBeginningIncludeMarkers() ); Assert.Multiple(() => { - Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.Select(tr => tr.Text))); Assert.That(rows[0].Text, Is.EqualTo("//")); }); }