Skip to content

Commit

Permalink
Merge branch 'master' into fix_usfm_parsing_bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 authored Aug 1, 2024
2 parents b140176 + 3911dd6 commit d2eddcd
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 31 deletions.
5 changes: 5 additions & 0 deletions src/SIL.Machine/Scripture/ScriptureRangeParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ public Dictionary<string, List<int>> GetChapters(string chapterSelections)
Dictionary<string, List<int>> chaptersPerBook = new Dictionary<string, List<int>>();
chapterSelections = chapterSelections.Trim();

if (chapterSelections.Length == 0)
{
return chaptersPerBook;
}

char delimiter = ';';
if (chapterSelections.Contains(';'))
{
Expand Down
112 changes: 82 additions & 30 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text.Json;
using System.IO.Compression;
using System.Text.Json;
using NUnit.Framework;

namespace SIL.Machine.Corpora;
Expand Down Expand Up @@ -66,40 +67,91 @@ public record PretranslationDto

[Test]
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
/*
In order to run this test on specific projects, place the Paratext projects or Paratext project zips in the Corpora/TestData/project/ folder.
If only testing one project, you can instead place the project in the Corpora/TestData/ folder and rename it to "project"
*/
public async Task CreateUsfmFile()
{
FileParatextProjectSettingsParser parser = new(ParatextProjectPath);
ParatextProjectSettings settings = parser.Parse();
async Task GetUsfmAsync(string projectPath)
{
ParatextProjectSettingsParserBase parser;
ZipArchive? projectArchive = null;
try
{
projectArchive = ZipFile.Open(projectPath, ZipArchiveMode.Read);
parser = new ZipParatextProjectSettingsParser(projectArchive);
}
catch (UnauthorizedAccessException)
{
parser = new FileParatextProjectSettingsParser(projectPath);
}
ParatextProjectSettings settings = parser.Parse();

// Read text from pretranslations file
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
(IReadOnlyList<ScriptureRef>, string)[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray() ?? []
),
p?.Translation ?? ""
// Read text from pretranslations file
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
(IReadOnlyList<ScriptureRef>, string)[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
)
)
.ToArrayAsync();

foreach (
string sfmFileName in Directory.EnumerateFiles(
ParatextProjectPath,
$"{settings.FileNamePrefix}*{settings.FileNameSuffix}"
)
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray()
?? []
),
p?.Translation ?? ""
)
)
.ToArrayAsync();
List<string> sfmTexts = [];
if (projectArchive == null)
{
sfmTexts = (
await Task.WhenAll(
Directory
.EnumerateFiles(projectPath, $"{settings.FileNamePrefix}*{settings.FileNameSuffix}")
.Select(async sfmFileName => await File.ReadAllTextAsync(sfmFileName))
)
).ToList();
}
else
{
sfmTexts = projectArchive
.Entries.Where(e =>
e.Name.StartsWith(settings.FileNamePrefix) && e.Name.EndsWith(settings.FileNameSuffix)
)
.Select(e =>
{
string contents;
using (var sr = new StreamReader(e.Open()))
{
contents = sr.ReadToEnd();
}
return contents;
})
.ToList();
}
foreach (string usfm in sfmTexts)
{
var updater = new UsfmTextUpdater(pretranslations, stripAllText: true, preferExistingText: true);

Check failure on line 138 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 138 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 138 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 138 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
}
}
if (!File.Exists(Path.Combine(ParatextProjectPath, "Settings.xml")))
{
var updater = new UpdateUsfmParserHandler(pretranslations, stripAllText: true, preferExistingText: true);
string usfm = await File.ReadAllTextAsync(sfmFileName);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
Assert.Multiple(() =>
{
foreach (string subdir in Directory.EnumerateFiles(ParatextProjectPath))
Assert.DoesNotThrowAsync(async () => await GetUsfmAsync(subdir), $"Failed to parse {subdir}");
});
}
else
{
await GetUsfmAsync(ParatextProjectPath);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ public static IEnumerable<TestCaseData> GetCases()
new Dictionary<string, List<int>> { { "JHN", new List<int>() } },
false
);
yield return new TestCaseData("", new Dictionary<string, List<int>>(), false);

//*Throw exceptions
yield return new TestCaseData("MAT3-1", new Dictionary<string, List<int>>(), true);
Expand All @@ -185,7 +186,6 @@ public static IEnumerable<TestCaseData> GetCases()
yield return new TestCaseData("MAT0-10", new Dictionary<string, List<int>>(), true);
yield return new TestCaseData("MAT-FLUM", new Dictionary<string, List<int>>(), true);
yield return new TestCaseData("-MAT-FLUM", new Dictionary<string, List<int>>(), true);
yield return new TestCaseData("", new Dictionary<string, List<int>>(), true);
yield return new TestCaseData("ABC", new Dictionary<string, List<int>>(), true);
yield return new TestCaseData("MAT-ABC", new Dictionary<string, List<int>>(), true);
yield return new TestCaseData("NT;-ABC-LUK", new Dictionary<string, List<int>>(), true);
Expand Down

0 comments on commit d2eddcd

Please sign in to comment.