Skip to content

Commit

Permalink
Incorporate review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Aug 22, 2024
1 parent 42a416f commit 45c7682
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 76 deletions.
11 changes: 5 additions & 6 deletions src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,16 @@ public ParatextBackupTermsCorpus(
using (var archive = ZipFile.OpenRead(fileName))
{
ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse();
IEnumerable<(string, IEnumerable<string>)> glosses = new ZipParatextTermsParser(archive).Parse(
settings,
termCategories,
useTermGlosses
);
IEnumerable<(string, IReadOnlyList<string>)> glosses = new ZipParatextProjectTermsParser(
archive,
settings
).Parse(termCategories, useTermGlosses);
string textId =
$"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}";

IText text = new MemoryText(
textId,
glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2.ToList() })
glosses.Select(kvp => new TextRow(textId, kvp.Item1) { Segment = kvp.Item2 })
);
AddText(text);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Reflection;
Expand All @@ -9,7 +10,7 @@

namespace SIL.Machine.Corpora
{
public abstract class ParatextTermsParserBase
public abstract class ParatextProjectTermsParserBase
{
private static readonly List<string> PredefinedTermsListTypes = new List<string>()
{
Expand All @@ -34,19 +35,30 @@ public abstract class ParatextTermsParserBase
private static readonly Regex ContentInBracketsRegex = new Regex(@"^\[(.+?)\]$", RegexOptions.Compiled);
private static readonly Regex NumericalInformationRegex = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled);

public IEnumerable<(string, IEnumerable<string>)> Parse(
ParatextProjectSettings settings,
private readonly ParatextProjectSettings _settings;

protected ParatextProjectTermsParserBase(ParatextProjectSettings settings)
{
_settings = settings;
}

protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase settingsParser)
{
_settings = settingsParser.Parse();
}

public IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> Parse(
IEnumerable<string> termCategories,
bool useTermGlosses = true
)
{
XDocument biblicalTermsDoc;
IDictionary<string, string> termIdToCategoryDictionary;
if (settings.BiblicalTermsListType == "Project")
if (_settings.BiblicalTermsListType == "Project")
{
if (Exists(settings.BiblicalTermsFileName))
if (Exists(_settings.BiblicalTermsFileName))
{
using (Stream keyTermsFile = Open(settings.BiblicalTermsFileName))
using (Stream keyTermsFile = Open(_settings.BiblicalTermsFileName))
{
biblicalTermsDoc = XDocument.Load(keyTermsFile);
termIdToCategoryDictionary = GetCategoryPerId(biblicalTermsDoc);
Expand All @@ -65,12 +77,12 @@ public abstract class ParatextTermsParserBase
}
}
}
else if (PredefinedTermsListTypes.Contains(settings.BiblicalTermsListType))
else if (PredefinedTermsListTypes.Contains(_settings.BiblicalTermsListType))
{
using (
Stream keyTermsFile = Assembly
.GetExecutingAssembly()
.GetManifestResourceStream("SIL.Machine.Corpora." + settings.BiblicalTermsFileName)
.GetManifestResourceStream("SIL.Machine.Corpora." + _settings.BiblicalTermsFileName)
)
{
biblicalTermsDoc = XDocument.Load(keyTermsFile);
Expand All @@ -84,9 +96,9 @@ public abstract class ParatextTermsParserBase

XDocument termsGlossesDoc = null;
if (
settings.LanguageCode != null
&& settings.BiblicalTermsListType == "Major"
&& SupportedLanguageTermsLocalizationXmls.TryGetValue(settings.LanguageCode, out string resourceName)
_settings.LanguageCode != null
&& _settings.BiblicalTermsListType == "Major"
&& SupportedLanguageTermsLocalizationXmls.TryGetValue(_settings.LanguageCode, out string resourceName)
)
{
using (Stream keyTermsFile = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName))
Expand Down Expand Up @@ -147,9 +159,9 @@ public abstract class ParatextTermsParserBase
{
return termsRenderings
.Concat(termsGlosses.Where(kvp => !termsRenderings.ContainsKey(kvp.Key)))
.Select(kvp => (kvp.Key, kvp.Value));
.Select(kvp => (kvp.Key, (IReadOnlyList<string>)kvp.Value.ToList()));
}
return new List<(string, IEnumerable<string>)>();
return new List<(string, IReadOnlyList<string>)>();
}

private static bool IsInCategory(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@

namespace SIL.Machine.Corpora
{
public class ZipParatextTermsParser : ParatextTermsParserBase
public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase
{
private readonly ZipArchive _archive;

public ZipParatextTermsParser(ZipArchive archive)
public ZipParatextProjectTermsParser(ZipArchive archive, ParatextProjectSettings settings = null)
: base(settings ?? new ZipParatextProjectSettingsParser(archive).Parse())
{
_archive = archive;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

namespace SIL.Machine.Corpora;

public class MemoryParatextTermsParser(IDictionary<string, string> files) : ParatextTermsParserBase
public class MemoryParatextProjectTermsParser(ParatextProjectSettings settings, IDictionary<string, string> files)
: ParatextProjectTermsParserBase(settings)
{
public IDictionary<string, string> Files { get; } = files;

Expand Down
17 changes: 17 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/ParatextBackupTermsCorpus.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using NUnit.Framework;

namespace SIL.Machine.Corpora;

[TestFixture]
public class ParatextBackupTermsCorpusTests
{
[Test]
public void CreateCorpus()
{
string backupDir = CorporaTestHelpers.CreateTestParatextBackup();
var corpus = new ParatextBackupTermsCorpus(backupDir, new string[] { "PN" }, true);
IList<TextRow> rows = corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(1));
Assert.That(rows.First().Text, Is.EqualTo("Xerxes"));
}
}
26 changes: 0 additions & 26 deletions tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsCorpus.cs

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
namespace SIL.Machine.Corpora;

[TestFixture]
public class ParatextTermsCorpusTests
public class ParatextProjectTermsParserTests
{
[Test]
public void TestGetKeyTermsFromTermsRenderings()
Expand Down Expand Up @@ -38,9 +38,9 @@ public void TestGetKeyTermsFromTermsRenderings()
}
}
);
IList<TextRow> rows = env.Corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(1));
Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Xerxes"));
IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> terms = env.GetGlosses();
Assert.That(terms.Count, Is.EqualTo(1));
Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Xerxes"));
}

[Test]
Expand All @@ -53,9 +53,9 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings()
),
useTermGlosses: true
);
IList<TextRow> rows = env.Corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(5726));
Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Abagtha"));
IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> terms = env.GetGlosses();
Assert.That(terms.Count, Is.EqualTo(5726));
Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Abagtha"));
}

[Test]
Expand All @@ -68,8 +68,8 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermG
),
useTermGlosses: false
);
IList<TextRow> rows = env.Corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(0));
IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> terms = env.GetGlosses();
Assert.That(terms.Count, Is.EqualTo(0));
}

[Test]
Expand All @@ -82,9 +82,9 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_PreferLocaliz
),
useTermGlosses: true
);
IList<TextRow> rows = env.Corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(5726));
Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Abagtha"));
IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> terms = env.GetGlosses();
Assert.That(terms.Count, Is.EqualTo(5726));
Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Abagtha"));
}

[Test]
Expand All @@ -98,9 +98,9 @@ public void TestGetKeyTermsFromTermsLocalizations_()
),
useTermGlosses: true
);
IList<TextRow> rows = env.Corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(5715));
Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Aaron"));
IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> terms = env.GetGlosses();
Assert.That(terms.Count, Is.EqualTo(5715));
Assert.That(string.Join(" ", terms.First().Glosses), Is.EqualTo("Aaron"));
}

[Test]
Expand Down Expand Up @@ -129,10 +129,10 @@ public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLoc
},
useTermGlosses: true
);
IList<TextRow> rows = env.Corpus.GetRows().ToList();
Assert.That(rows.Count, Is.EqualTo(5726));
Assert.That(string.Join(" ", rows.First().Segment), Is.EqualTo("Xerxes"));
Assert.That(string.Join(" ", rows[2].Segment), Is.EqualTo("Abi"));
IReadOnlyList<(string TermId, IReadOnlyList<string> Glosses)> terms = env.GetGlosses().ToList();
Assert.That(terms.Count, Is.EqualTo(5726));
Assert.That(string.Join(" ", terms[1].Glosses), Is.EqualTo("Abagtha"));
Assert.That(string.Join(" ", terms[2].Glosses), Is.EqualTo("Abi"));
}

[Test]
Expand All @@ -144,7 +144,7 @@ public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLoc
public void TestStripParens(string testString, string expectedOutput, char left = '(', char right = ')')
{
Assert.That(
ParatextTermsParserBase.StripParens(testString, left: left, right: right),
ParatextProjectTermsParserBase.StripParens(testString, left: left, right: right),
Is.EqualTo(expectedOutput)
);
}
Expand All @@ -159,7 +159,7 @@ public void TestStripParens(string testString, string expectedOutput, char left
[TestCase("Ahasuerus, Xerxes; Assuerus", new string[] { "Ahasuerus", "Xerxes", "Assuerus" })]
public void TestGetGlosses(string glossString, IReadOnlyList<string> expectedOutput)
{
Assert.That(ParatextTermsParserBase.GetGlosses(glossString), Is.EqualTo(expectedOutput));
Assert.That(ParatextProjectTermsParserBase.GetGlosses(glossString), Is.EqualTo(expectedOutput));
}

private class TestEnvironment(
Expand All @@ -168,13 +168,14 @@ private class TestEnvironment(
bool useTermGlosses = true
)
{
public ParatextProjectTermsCorpus Corpus { get; } =
new ParatextProjectTermsCorpus(
files ?? new(),
settings ?? new DefaultParatextProjectSettings(),
new string[] { "PN" },
useTermGlosses
);
private readonly bool _useTermGlosses = useTermGlosses;
public ParatextProjectTermsParserBase Parser { get; } =
new MemoryParatextProjectTermsParser(settings ?? new DefaultParatextProjectSettings(), files ?? new());

public IEnumerable<(string TermId, IReadOnlyList<string> Glosses)> GetGlosses()
{
return Parser.Parse(new string[] { "PN" }, _useTermGlosses);
}
}

private class DefaultParatextProjectSettings(
Expand Down

0 comments on commit 45c7682

Please sign in to comment.