Skip to content

Commit

Permalink
Use language code from settings file
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Aug 16, 2024
1 parent 056c9fd commit df2e9ac
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 46 deletions.
83 changes: 42 additions & 41 deletions src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,48 +32,17 @@ public class ParatextBackupTermsCorpus : DictionaryTextCorpus
{ "pt", "SIL.Machine.Corpora.BiblicalTermsPt.xml" }
};

private static readonly Regex ContentInBracketsRegex = new Regex(@"^\[(.+?)\]$", RegexOptions.Compiled);
private static readonly Regex NumericalInformationRegex = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled);

public ParatextBackupTermsCorpus(
string fileName,
IEnumerable<string> termCategories,
string languageCode = null,
bool preferTermsLocalization = false
)
{
using (var archive = ZipFile.OpenRead(fileName))
{
ZipArchiveEntry termsFileEntry = archive.GetEntry("TermRenderings.xml");
XDocument doc;
bool useTermsRenderingXml = !preferTermsLocalization && termsFileEntry != null;

if (!SupportedLanguageTermsLocalizationXmls.TryGetValue(languageCode, out string resourceName))
{
if (termsFileEntry != null)
{
useTermsRenderingXml = true;
}
else
{
return;
}
}

if (useTermsRenderingXml)
{
using (Stream keyTermsFile = termsFileEntry.Open())
{
doc = XDocument.Load(keyTermsFile);
}
}
else
{
using (
Stream keyTermsFile = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName)
)
{
doc = XDocument.Load(keyTermsFile);
}
}

var settingsParser = new ZipParatextProjectSettingsParser(archive);
ParatextProjectSettings settings = settingsParser.Parse();

Expand Down Expand Up @@ -121,6 +90,40 @@ public ParatextBackupTermsCorpus(
{
termIdToCategoryDictionary = new Dictionary<string, string>();
}
ZipArchiveEntry termsFileEntry = archive.GetEntry("TermRenderings.xml");
XDocument doc;
bool useTermsRenderingXml =
(!preferTermsLocalization || settings.BiblicalTermsListType != "Major") && termsFileEntry != null;

if (!SupportedLanguageTermsLocalizationXmls.TryGetValue(settings.LanguageCode, out string resourceName))
{
if (termsFileEntry != null)
{
useTermsRenderingXml = true;
}
else
{
return;
}
}

if (useTermsRenderingXml)
{
using (Stream keyTermsFile = termsFileEntry.Open())
{
doc = XDocument.Load(keyTermsFile);
}
}
else
{
using (
Stream keyTermsFile = Assembly.GetExecutingAssembly().GetManifestResourceStream(resourceName)
)
{
doc = XDocument.Load(keyTermsFile);
}
}

AddTexts(doc, settings, termCategories, termIdToCategoryDictionary);
}
}
Expand Down Expand Up @@ -168,19 +171,17 @@ IDictionary<string, string> termIdToCategoryDictionary
public static IReadOnlyList<string> GetGlosses(string gloss)
{
//If entire term rendering is surrounded in square brackets, remove them
Regex rx = new Regex(@"^\[(.+?)\]$", RegexOptions.Compiled);
Match rx_match = rx.Match(gloss);
if (rx_match.Success)
gloss = rx_match.Groups[0].Value;
Match match = ContentInBracketsRegex.Match(gloss);
if (match.Success)
gloss = match.Groups[0].Value;
gloss = gloss.Replace("?", "");
gloss = gloss.Replace("*", "");
gloss = gloss.Replace("/", " ");
gloss = gloss.Trim();
gloss = StripParens(gloss);
gloss = StripParens(gloss, left: '[', right: ']');
// gloss = gloss.Trim();
Regex rx2 = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled);
foreach (Match m in rx2.Matches(gloss))
gloss = gloss.Trim();
foreach (Match m in NumericalInformationRegex.Matches(gloss))
{
gloss.Replace(m.Value, "");
}
Expand Down
6 changes: 5 additions & 1 deletion src/SIL.Machine/Corpora/ParatextProjectSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ public ParatextProjectSettings(
string fileNameSuffix,
string biblicalTermsListType,
string biblicalTermsProjectName,
string biblicalTermsFileName
string biblicalTermsFileName,
string languageCode
)
{
Name = name;
Expand All @@ -31,6 +32,7 @@ string biblicalTermsFileName
BiblicalTermsListType = biblicalTermsListType;
BiblicalTermsProjectName = biblicalTermsProjectName;
BiblicalTermsFileName = biblicalTermsFileName;
LanguageCode = languageCode;
}

public string Name { get; }
Expand All @@ -45,6 +47,8 @@ string biblicalTermsFileName
public string BiblicalTermsProjectName { get; }
public string BiblicalTermsFileName { get; }

public string LanguageCode { get; }

public bool IsBookFileName(string fileName, out string bookId)
{
bookId = null;
Expand Down
21 changes: 19 additions & 2 deletions src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,25 @@ public ParatextProjectSettings Parse()
{
throw new InvalidOperationException(
$"The BiblicalTermsListSetting element in Settings.xml in project {fullName}"
+ $" is not in the expected format (i.e., Major::BiblicalTerms.xml) but is {biblicalTermsListSetting}."
+ $" is not in the expected format (e.g., Major::BiblicalTerms.xml) but is {biblicalTermsListSetting}."
);
}
string languageIsoCodeSetting = settingsDoc.Root.Element("LanguageIsoCode")?.Value;
if (languageIsoCodeSetting == null)
{
throw new InvalidOperationException(
$"The LanguageIsoCode element in Settings.xml in project {fullName} does not exist."
);
}
string[] languageIsoCodeSettingParts = settingsDoc.Root.Element("LanguageIsoCode").Value.Split(':');
if (languageIsoCodeSettingParts.Length != 4)
{
throw new InvalidOperationException(
$"The LanguageIsoCode element in Settings.xml in project {fullName}"
+ $" is not in the expected format (e.g., en:::) but is {languageIsoCodeSetting}."
);
}
string languageCode = languageIsoCodeSettingParts[0];

return new ParatextProjectSettings(
name,
Expand All @@ -107,7 +123,8 @@ public ParatextProjectSettings Parse()
suffix,
parts[0],
parts[1],
parts[2]
parts[2],
languageCode
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ private class TestEnvironment : DisposableBase
public TestEnvironment(bool preferTermsLocalization = false)
{
_backupPath = CorporaTestHelpers.CreateTestParatextBackup();
Corpus = new ParatextBackupTermsCorpus(_backupPath, new string[] { "PN" }, "en", preferTermsLocalization);
Corpus = new ParatextBackupTermsCorpus(_backupPath, new string[] { "PN" }, preferTermsLocalization);
}

public ParatextBackupTermsCorpus Corpus { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ private static ParatextProjectSettings CreateSettings(string fileNameForm)
".SFM",
"Major",
"",
"BiblicalTerms.xml"
"BiblicalTerms.xml",
"en"
);
}
}

0 comments on commit df2e9ac

Please sign in to comment.