diff --git a/src/SIL.Machine/Corpora/UsfmParser.cs b/src/SIL.Machine/Corpora/UsfmParser.cs index 1047eb24..108f12be 100644 --- a/src/SIL.Machine/Corpora/UsfmParser.cs +++ b/src/SIL.Machine/Corpora/UsfmParser.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text; using System.Text.RegularExpressions; using SIL.Scripture; @@ -42,7 +43,19 @@ public static void Parse( versification, preserveWhitespace ); - parser.ProcessTokens(); + try + { + parser.ProcessTokens(); + } + catch (Exception ex) + { + var sb = new StringBuilder(); + sb.Append( + $"An error occurred while parsing the USFM text in Verse: {parser.State.VerseRef}, line: {parser.State.LineNumber}, " + ); + sb.Append($"column: {parser.State.ColumnNumber}, error: '{ex.Message}'"); + throw new InvalidOperationException(sb.ToString(), ex); + } } private static readonly Regex OptBreakSplitter = new Regex("(//)", RegexOptions.Compiled); @@ -130,18 +143,7 @@ bool preserveWhitespace /// public void ProcessTokens() { - bool continueProcessing = true; - while (continueProcessing) - { - try - { - continueProcessing = ProcessToken(); - } - catch (Exception e) - { - throw new UsfmParsingException(State, e); - } - } + while (ProcessToken()) { } } /// diff --git a/src/SIL.Machine/Corpora/UsfmParsingException.cs b/src/SIL.Machine/Corpora/UsfmParsingException.cs deleted file mode 100644 index b3bcbbd5..00000000 --- a/src/SIL.Machine/Corpora/UsfmParsingException.cs +++ /dev/null @@ -1,14 +0,0 @@ -using System; -using System.Linq; - -namespace SIL.Machine.Corpora -{ - public class UsfmParsingException : Exception - { - public UsfmParsingException(UsfmParserState state, Exception exception) - : base( - $"Failed to parse at line {state.LineNumber} column {state.ColumnNumber} verse ref {state.VerseRef} with surrounding tokens [{string.Join(",", state.Tokens.ToList().GetRange(Math.Max(state.Index - 3, 0), Math.Min(7, state.Tokens.Count - (state.Index - 3))).Select(t => $"{t.Text} (TokenType={t.Type})"))}]", - exception - ) { } - } -} diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index 6c022a8d..3cb84df1 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -92,19 +92,22 @@ public void GetRows_TriplicateVerse() TextRow[] rows = GetRows( @"\id MAT - Test \c 1 -\v 1 First verse -\rem non verse -\v 1 First verse -\rem non verse -\v 1 First verse +\v 1 First verse 1 +\rem non verse 1 +\v 1 First verse 2 +\rem non verse 2 +\v 1 First verse 3 +\rem non verse 3 \v 2 Second verse ", includeAllText: true ); Assert.Multiple(() => { - Assert.That(rows[0].Text, Is.EqualTo("First verse"), string.Join(",", rows.ToList().Select(tr => tr.Text))); - Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.ToList().Select(tr => tr.Text))); + Assert.That(rows[0].Text, Is.EqualTo("First verse 1")); + Assert.That(rows[3].Text, Is.EqualTo("non verse 3")); + Assert.That(rows[4].Text, Is.EqualTo("Second verse")); }); }