From 156cbecd90000c45dfd25988c02c8c06e97eaf35 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 10 Jul 2024 15:45:44 -0400 Subject: [PATCH] Move to tokenization tests --- .../Corpora/UsfmMemoryTextTests.cs | 17 ---------- .../Corpora/UsfmTokenizerTests.cs | 34 ++++++++++++++++++- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs index af3ed0e5..db67e9a6 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs @@ -49,23 +49,6 @@ public void GetRows_LastSegment() }); } - [Test] - public void GetRows_Ending_ParagraphMarker() - { - //The ending paragraph marker should not crash the parser. - TextRow[] rows = GetRows( - @"\id MAT - Test -\c 1 -\v 1 Descriptive title\x - \xo 18:16 \xt hello world\x*\p -" - ); - - Assert.Multiple(() => - { - Assert.That(rows, Has.Length.EqualTo(1)); - }); - } - private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) { UsfmMemoryText text = diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmTokenizerTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmTokenizerTests.cs index 6137246a..b2dbb609 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmTokenizerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmTokenizerTests.cs @@ -1,4 +1,5 @@ -using NUnit.Framework; +using System.Text; +using NUnit.Framework; namespace SIL.Machine.Corpora; @@ -47,8 +48,39 @@ public void Detokenize() Assert.That(result, Is.EqualTo(usfm)); } + [Test] + public void Tokenize_Ending_ParagraphMarker() + { + //The ending paragraph marker should not crash the parser. + TextRow[] rows = GetRows( + @"\id MAT - Test +\c 1 +\v 1 Descriptive title\x - \xo 18:16 \xt hello world\x*\p +" + ); + + Assert.Multiple(() => + { + Assert.That(rows, Has.Length.EqualTo(1)); + }); + } + private static string ReadUsfm() { return File.ReadAllText(Path.Combine(CorporaTestHelpers.UsfmTestProjectPath, "41MATTes.SFM")); } + + private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false) + { + UsfmMemoryText text = + new( + new UsfmStylesheet("usfm.sty"), + Encoding.UTF8, + "MAT", + usfm.Trim().ReplaceLineEndings("\r\n") + "\r\n", + includeMarkers: includeMarkers, + includeAllText: includeAllText + ); + return text.GetRows().ToArray(); + } }