From 7f026091c19339559fb58517218ed649b620e4b7 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 23 Aug 2024 12:51:45 -0400 Subject: [PATCH 1/2] Ignore freestanding ellipses --- .../ScriptureRefUsfmParserHandlerBase.cs | 2 +- src/SIL.Machine/Corpora/UsfmTextBase.cs | 5 +++ .../Corpora/UsfmMemoryTextTests.cs | 41 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index 363209ed..3fd2eb62 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -171,7 +171,7 @@ public override void EndNote(UsfmParserState state, string marker, bool closed) public override void Text(UsfmParserState state, string text) { // if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment - if (text.Trim().Length > 0) + if (text.Trim().Length > 0 && text.Trim() != "...") CheckConvertVerseParaToNonVerse(state); } diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index ee3d84f8..f530fd9a 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -235,6 +235,11 @@ public override void Text(UsfmParserState state, string text) if (_rowTexts.Count == 0) return; + if (text.Trim() == "...") + { + text = ""; + } + StringBuilder rowText = _rowTexts.Peek(); if (_text._includeMarkers) { diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index e0712e45..71b7f395 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -184,6 +184,47 @@ public void GetRows_OptBreak_BeginningIncludeMarkers() }); } + [Test] + public void GetRows_IgnoreLoneEllipsis() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\c 1 +\q1 +\f \fr 119 \ft World \f* +\v 1 ... +\v 2 Text +\c 2 +\d +description +\b +", + includeAllText: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.Select(tr => tr.Text))); + Assert.That(rows[1].Text, Is.EqualTo("")); + }); + } + + [Test] + public void GetRows_Ellipsis() + { + TextRow[] rows = GetRows( + @"\id MAT - Test +\c 1 +\v 1 Verse text ... More text +", + includeAllText: true + ); + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(1), string.Join(",", rows.Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("Verse text ... More text")); + }); + } + private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) { UsfmMemoryText text = From 0d743a7c13a9c17bb99bd1626d5c434a70158b57 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Mon, 26 Aug 2024 12:42:45 -0400 Subject: [PATCH 2/2] Move check to text generation --- .../Corpora/ScriptureRefUsfmParserHandlerBase.cs | 2 +- src/SIL.Machine/Corpora/UsfmTextBase.cs | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs index 3fd2eb62..363209ed 100644 --- a/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs +++ b/src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs @@ -171,7 +171,7 @@ public override void EndNote(UsfmParserState state, string marker, bool closed) public override void Text(UsfmParserState state, string text) { // if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment - if (text.Trim().Length > 0 && text.Trim() != "...") + if (text.Trim().Length > 0) CheckConvertVerseParaToNonVerse(state); } diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index f530fd9a..e3777f08 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -235,11 +235,6 @@ public override void Text(UsfmParserState state, string text) if (_rowTexts.Count == 0) return; - if (text.Trim() == "...") - { - text = ""; - } - StringBuilder rowText = _rowTexts.Peek(); if (_text._includeMarkers) { @@ -279,6 +274,10 @@ protected override void StartVerseText(UsfmParserState state, IReadOnlyList scriptureRefs) { string text = _rowTexts.Pop().ToString(); + if (text.Trim() == "...") + { + text = ""; + } _rows.AddRange(_text.CreateRows(scriptureRefs, text, _sentenceStart)); _sentenceStart = state.Token.Marker == "c" || text.HasSentenceEnding(); }