Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fixes Paratext project documentation #139

Fixes Paratext zipped file - E2E "missing" test  #125

Note: I had difficulty making a large enough paratext backup zip, so I've added separate paratext project E2E and a >5MB file test.
  • Loading branch information
Enkidu93 committed Sep 27, 2023
1 parent 1055c05 commit 3f84743
Show file tree
Hide file tree
Showing 8 changed files with 167,689 additions and 28 deletions.
38 changes: 20 additions & 18 deletions src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public partial interface IDataFilesClient
/// <br/>* **Text**: One translation unit (a.k.a., verse) per line
/// <br/> * If there is a tab, the content before the tab is the unique identifier for the line
/// <br/> * Otherwise, no tabs should be used in the file.
/// <br/>* **Paratext**: A complete, zipped Paratext project</param>
/// <br/>* **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue.</param>
/// <param name="name">A name to help identify and distinguish the file.
/// <br/>Recommendation: Create a multi-part name to distinguish between projects, uses, languages, etc.
/// <br/>The name does not have to be unique.
Expand Down Expand Up @@ -233,7 +233,7 @@ public string BaseUrl
/// <br/>* **Text**: One translation unit (a.k.a., verse) per line
/// <br/> * If there is a tab, the content before the tab is the unique identifier for the line
/// <br/> * Otherwise, no tabs should be used in the file.
/// <br/>* **Paratext**: A complete, zipped Paratext project</param>
/// <br/>* **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue.</param>
/// <param name="name">A name to help identify and distinguish the file.
/// <br/>Recommendation: Create a multi-part name to distinguish between projects, uses, languages, etc.
/// <br/>The name does not have to be unique.
Expand Down Expand Up @@ -812,8 +812,8 @@ public partial interface ITranslationEnginesClient
/// <br/>
/// <br/> {
/// <br/> "name": "myTeam:myProject:myEngine",
/// <br/> "sourceLanguage": "ell_Grek",
/// <br/> "targetLanguage": "eng_Latn",
/// <br/> "sourceLanguage": "el",
/// <br/> "targetLanguage": "en",
/// <br/> "type": "Nmt"
/// <br/> }
/// </remarks>
Expand Down Expand Up @@ -899,9 +899,9 @@ public partial interface ITranslationEnginesClient
/// <br/> * **FileId**: The unique id referencing the uploaded file
/// <br/> * **TextId**: The client-defined name to associate source and target files.
/// <br/> * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine.
/// <br/> * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines
/// <br/> of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// <br/> * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// <br/> * A TextId should only be used at most once in SourceFiles and in TargetFiles.
/// <br/> * If the file is a Paratext project, this field should be left blank. Any TextId provided will be ignored.
/// <br/>* **TargetFiles**: The source files associated with the corpus
/// <br/> * Same as SourceFiles. Parallel texts must have a matching TextId.
/// </remarks>
Expand Down Expand Up @@ -993,11 +993,12 @@ public partial interface ITranslationEnginesClient
/// Starts a build job for a translation engine.
/// </summary>
/// <remarks>
/// Specify the corpora or textId's to pretranslate. Even when a corpus or TextId
/// <br/>is selected for pretranslation, only "untranslated" text will be pretranslated,
/// Specify the corpora or textIds to pretranslate. Even when a corpus or textId
/// <br/>is selected for pretranslation, only "untranslated" text will be pretranslated:
/// <br/>that is, segments (lines of text) in the specified corpora or textId's that have
/// <br/>untranslated text but no translated text. If the engine does not support
/// <br/>pretranslation, these fields have no effect.
/// <br/>untranslated text but no translated text. If a corpus is a Paratext project,
/// <br/>you may flag a subset of books for pretranslation by including their [abbreviations](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs)
/// <br/>in the textIds parameter. If the engine does not support pretranslation, these fields have no effect.
/// </remarks>
/// <param name="id">The translation engine id</param>
/// <param name="buildConfig">The build config (see remarks)</param>
Expand Down Expand Up @@ -1199,8 +1200,8 @@ public string BaseUrl
/// <br/>
/// <br/> {
/// <br/> "name": "myTeam:myProject:myEngine",
/// <br/> "sourceLanguage": "ell_Grek",
/// <br/> "targetLanguage": "eng_Latn",
/// <br/> "sourceLanguage": "el",
/// <br/> "targetLanguage": "en",
/// <br/> "type": "Nmt"
/// <br/> }
/// </remarks>
Expand Down Expand Up @@ -2017,9 +2018,9 @@ public string BaseUrl
/// <br/> * **FileId**: The unique id referencing the uploaded file
/// <br/> * **TextId**: The client-defined name to associate source and target files.
/// <br/> * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine.
/// <br/> * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines
/// <br/> of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// <br/> * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// <br/> * A TextId should only be used at most once in SourceFiles and in TargetFiles.
/// <br/> * If the file is a Paratext project, this field should be left blank. Any TextId provided will be ignored.
/// <br/>* **TargetFiles**: The source files associated with the corpus
/// <br/> * Same as SourceFiles. Parallel texts must have a matching TextId.
/// </remarks>
Expand Down Expand Up @@ -2794,11 +2795,12 @@ public string BaseUrl
/// Starts a build job for a translation engine.
/// </summary>
/// <remarks>
/// Specify the corpora or textId's to pretranslate. Even when a corpus or TextId
/// <br/>is selected for pretranslation, only "untranslated" text will be pretranslated,
/// Specify the corpora or textIds to pretranslate. Even when a corpus or textId
/// <br/>is selected for pretranslation, only "untranslated" text will be pretranslated:
/// <br/>that is, segments (lines of text) in the specified corpora or textId's that have
/// <br/>untranslated text but no translated text. If the engine does not support
/// <br/>pretranslation, these fields have no effect.
/// <br/>untranslated text but no translated text. If a corpus is a Paratext project,
/// <br/>you may flag a subset of books for pretranslation by including their [abbreviations](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs)
/// <br/>in the textIds parameter. If the engine does not support pretranslation, these fields have no effect.
/// </remarks>
/// <param name="id">The translation engine id</param>
/// <param name="buildConfig">The build config (see remarks)</param>
Expand Down
2 changes: 1 addition & 1 deletion src/Serval.DataFiles/Controllers/DataFilesController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public async Task<ActionResult<DataFileDto>> GetAsync([NotNull] string id, Cance
/// * **Text**: One translation unit (a.k.a., verse) per line
/// * If there is a tab, the content before the tab is the unique identifier for the line
/// * Otherwise, no tabs should be used in the file.
/// * **Paratext**: A complete, zipped Paratext project
/// * **Paratext**: A complete, zipped Paratext project backup: that is, a .zip archive of files including the USFM files and "Settings.xml" file. To generate a zipped backup for a project in Paratext, navigate to "Paratext/Advanced/Backup project to file..." and follow the dialogue.
/// </param>
/// <param name="idGenerator"></param>
/// <param name="cancellationToken"></param>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ CancellationToken cancellationToken
///
/// {
/// "name": "myTeam:myProject:myEngine",
/// "sourceLanguage": "ell_Grek",
/// "targetLanguage": "eng_Latn",
/// "sourceLanguage": "el",
/// "targetLanguage": "en",
/// "type": "Nmt"
/// }
///
Expand Down Expand Up @@ -371,9 +371,9 @@ CancellationToken cancellationToken
/// * **FileId**: The unique id referencing the uploaded file
/// * **TextId**: The client-defined name to associate source and target files.
/// * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine.
/// * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines
/// of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// * A TextId should only be used at most once in SourceFiles and in TargetFiles.
/// * If the file is a Paratext project, this field should be left blank. Any TextId provided will be ignored.
/// * **TargetFiles**: The source files associated with the corpus
/// * Same as SourceFiles. Parallel texts must have a matching TextId.
/// </remarks>
Expand Down Expand Up @@ -734,11 +734,12 @@ CancellationToken cancellationToken
/// Starts a build job for a translation engine.
/// </summary>
/// <remarks>
/// Specify the corpora or textId's to pretranslate. Even when a corpus or TextId
/// is selected for pretranslation, only "untranslated" text will be pretranslated,
/// Specify the corpora or textIds to pretranslate. Even when a corpus or textId
/// is selected for pretranslation, only "untranslated" text will be pretranslated:
/// that is, segments (lines of text) in the specified corpora or textId's that have
/// untranslated text but no translated text. If the engine does not support
/// pretranslation, these fields have no effect.
/// untranslated text but no translated text. If a corpus is a Paratext project,
/// you may flag a subset of books for pretranslation by including their [abbreviations](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs)
/// in the textIds parameter. If the engine does not support pretranslation, these fields have no effect.
/// </remarks>
/// <param name="id">The translation engine id</param>
/// <param name="buildConfig">The build config (see remarks)</param>
Expand Down
62 changes: 62 additions & 0 deletions tests/Serval.E2ETests/ServalApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,24 @@ public async Task NmtBatch()
Assert.IsTrue(lTrans[0].Translation.Contains("dearly beloved Gaius"));
}

[Test]
public async Task NmtLargeBatch()
{
await _helperClient!.ClearEngines();
string engineId = await _helperClient.CreateNewEngine("Nmt", "es", "en", "NMT3");
var books = new string[] { "bible_LARGEFILE.txt" };
await _helperClient.AddTextCorpusToEngine(engineId, books, "es", "en", false);
var cId = await _helperClient.AddTextCorpusToEngine(engineId, new string[] { "3JN.txt" }, "es", "en", true);
await _helperClient.BuildEngine(engineId);
await Task.Delay(1000);
IList<Pretranslation> lTrans = await _helperClient.translationEnginesClient.GetAllPretranslationsAsync(
engineId,
cId
);
TestContext.WriteLine(lTrans[0].Translation);
Assert.IsTrue(lTrans[0].Translation.Contains("beloved"));
}

[Test]
public async Task GetNmtCancelAndRestartBuild()
{
Expand Down Expand Up @@ -283,4 +301,48 @@ async Task StartAndCancelTwice(string engineId)
build = await _helperClient.translationEnginesClient.GetBuildAsync(engineId, build.Id);
Assert.That(build.State == JobState.Canceled);
}

[Test]
public async Task ParatextProjectNmtJobAsync()
{
await _helperClient!.ClearEngines();
DataFile file = await _helperClient.dataFilesClient.CreateAsync(
new FileParameter(data: File.OpenRead("../../../data/TestProject.zip")),
FileFormat.Paratext
);
string engineId = await _helperClient.CreateNewEngine("Nmt", "en", "es", "NMT4");
await _helperClient.AddTextCorpusToEngine(
engineId,
new string[] { "1JN.txt", "2JN.txt", "3JN.txt" },
"en",
"es",
false
);
TranslationCorpus corpus = await _helperClient.translationEnginesClient.AddCorpusAsync(
engineId,
new TranslationCorpusConfig
{
SourceLanguage = "en",
TargetLanguage = "es",
SourceFiles = new TranslationCorpusFileConfig[]
{
new TranslationCorpusFileConfig { FileId = file.Id }
},
TargetFiles = new TranslationCorpusFileConfig[] { }
}
);
_helperClient.translationBuildConfig.Pretranslate!.Add(
new PretranslateCorpusConfig { CorpusId = corpus.Id, TextIds = new string[] { "JHN", "REV" } }
);
await _helperClient.BuildEngine(engineId);
Assert.That(
(await _helperClient.translationEnginesClient.GetAllBuildsAsync(engineId)).First().State
== JobState.Completed
);
IList<Pretranslation> lTrans = await _helperClient.translationEnginesClient.GetAllPretranslationsAsync(
engineId,
corpus.Id
);
Assert.That(lTrans, Is.Not.Empty);
}
}
2 changes: 1 addition & 1 deletion tests/Serval.E2ETests/ServalClientHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class ServalClientHelper
readonly Dictionary<string, string> EnginePerUser = new Dictionary<string, string>();
private string _prefix;

private TranslationBuildConfig translationBuildConfig = new TranslationBuildConfig
public TranslationBuildConfig translationBuildConfig = new TranslationBuildConfig
{
Pretranslate = new List<PretranslateCorpusConfig>()
};
Expand Down
Binary file added tests/Serval.E2ETests/data/TestProject.zip
Binary file not shown.
Loading

0 comments on commit 3f84743

Please sign in to comment.