Skip to content

Commit

Permalink
Test incorrect versification
Browse files Browse the repository at this point in the history
  • Loading branch information
ddaspit committed Aug 28, 2024
1 parent 1220953 commit 509cbe9
Showing 1 changed file with 76 additions and 0 deletions.
76 changes: 76 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.IO.Compression;
using System.Text;
using System.Text.Json;
using NUnit.Framework;

Expand Down Expand Up @@ -169,4 +170,79 @@ async Task GetUsfmAsync(string projectPath)
await GetUsfmAsync(ParatextProjectPath);
}
}

[Test]
public async Task Test()
{
FileParatextProjectSettingsParser targetSettingsParser = new(CorporaTestHelpers.UsfmTargetProjectPath);
ParatextProjectSettings targetSettings = targetSettingsParser.Parse();

FileParatextProjectSettingsParser sourceSettingsParser = new(CorporaTestHelpers.UsfmSourceProjectPath);
ParatextProjectSettings sourceSettings = sourceSettingsParser.Parse();

var sourceCorpus = new ParatextTextCorpus(CorporaTestHelpers.UsfmSourceProjectPath);
var targetCorpus = new ParatextTextCorpus(CorporaTestHelpers.UsfmTargetProjectPath);

var rows = AlignPretranslateCorpus(sourceCorpus.FilterTexts(["SUS"]), targetCorpus.FilterTexts(["SUS"]))
.ToList();

var updater = new UsfmTextUpdater(rows, stripAllText: true, preferExistingText: true);

Check failure on line 189 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 189 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 189 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 189 in tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'UsfmTextUpdater' could not be found (are you missing a using directive or an assembly reference?)
string usfm = await File.ReadAllTextAsync(
Path.Combine(CorporaTestHelpers.UsfmSourceProjectPath, sourceSettings.GetBookFileName("SUS"))
);
UsfmParser.Parse(usfm, updater, sourceSettings.Stylesheet, sourceSettings.Versification);
string newUsfm = updater.GetUsfm(sourceSettings.Stylesheet);
}

private static IEnumerable<(IReadOnlyList<ScriptureRef>, string)> AlignPretranslateCorpus(
ITextCorpus srcCorpus,
ITextCorpus trgCorpus
)
{
int rowCount = 0;
StringBuilder srcSegBuffer = new();
StringBuilder trgSegBuffer = new();
List<ScriptureRef> refs = [];
foreach (ParallelTextRow row in srcCorpus.AlignRows(trgCorpus, allSourceRows: true))
{
if (!row.IsTargetRangeStart && row.IsTargetInRange)
{
refs.AddRange(row.Refs.Cast<ScriptureRef>());
if (row.SourceText.Length > 0)
{
if (srcSegBuffer.Length > 0)
srcSegBuffer.Append(' ');
srcSegBuffer.Append(row.SourceText);
}
rowCount++;
}
else
{
if (rowCount > 0)
{
yield return (
refs.Select(r => ScriptureRef.Parse(r.ToString(), trgCorpus.Versification)).ToArray(),
srcSegBuffer.ToString()
);
srcSegBuffer.Clear();
trgSegBuffer.Clear();
refs.Clear();
rowCount = 0;
}

refs.AddRange(row.Refs.Cast<ScriptureRef>());
srcSegBuffer.Append(row.SourceText);
trgSegBuffer.Append(row.TargetText);
rowCount++;
}
}

if (rowCount > 0)
{
yield return (
refs.Select(r => ScriptureRef.Parse(r.ToString(), trgCorpus.Versification)).ToArray(),
srcSegBuffer.ToString()
);
}
}
}

0 comments on commit 509cbe9

Please sign in to comment.