Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Oct 28, 2024
1 parent b270eb5 commit 6e9f9c5
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 16 deletions.
14 changes: 12 additions & 2 deletions src/Serval/src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4437,10 +4437,15 @@ public partial interface ITranslationEnginesClient
/// Starts a build job for a translation engine.
/// </summary>
/// <remarks>
/// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). If no "trainOn" field is provided, all corpora will be used.
/// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either (legacy) corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora).
/// <br/>A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected for training by specifying the appropriate CorpusId in SourceFilters or TargetFilters.
/// <br/>Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training.
/// <br/>Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range)
/// <br/>All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information.
/// <br/>If `trainOn` or `pretranslate` is not provided, all corpora will be used for the respective task.
/// <br/>If a corpus is selected for training or pretranslation and neither scriptureRange or textIds are defined, all of the selected corpus will be used.
/// <br/>If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used.
/// <br/>If a corpus is selected for training or pretranslation, all corpora that are not selected will not be used for the respective task.
/// <br/>
/// <br/>Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation,
/// <br/>the following text will be pretranslated:
Expand Down Expand Up @@ -7217,10 +7222,15 @@ public string BaseUrl
/// Starts a build job for a translation engine.
/// </summary>
/// <remarks>
/// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). If no "trainOn" field is provided, all corpora will be used.
/// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either (legacy) corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora).
/// <br/>A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected for training by specifying the appropriate CorpusId in SourceFilters or TargetFilters.
/// <br/>Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training.
/// <br/>Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range)
/// <br/>All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information.
/// <br/>If `trainOn` or `pretranslate` is not provided, all corpora will be used for the respective task.
/// <br/>If a corpus is selected for training or pretranslation and neither scriptureRange or textIds are defined, all of the selected corpus will be used.
/// <br/>If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used.
/// <br/>If a corpus is selected for training or pretranslation, all corpora that are not selected will not be used for the respective task.
/// <br/>
/// <br/>Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation,
/// <br/>the following text will be pretranslated:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -990,10 +990,15 @@ CancellationToken cancellationToken
/// Starts a build job for a translation engine.
/// </summary>
/// <remarks>
/// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora). If no "trainOn" field is provided, all corpora will be used.
/// Specify the corpora and textIds/scriptureRanges within those corpora to train on. Only one type of corpus may be used: either (legacy) corpora (see /translation/engines/{id}/corpora) or parallel corpora (see /translation/engines/{id}/parallel-corpora).
/// A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected for training by specifying the appropriate CorpusId in SourceFilters or TargetFilters.
/// Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training.
/// Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range)
/// All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information.
/// If `trainOn` or `pretranslate` is not provided, all corpora will be used for the respective task.
/// If a corpus is selected for training or pretranslation and neither scriptureRange or textIds are defined, all of the selected corpus will be used.
/// If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used.
/// If a corpus is selected for training or pretranslation, all corpora that are not selected will not be used for the respective task.
///
/// Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation,
/// the following text will be pretranslated:
Expand Down
30 changes: 19 additions & 11 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,8 @@ private V1.ParallelCorpus Map(
Corpus source,
TrainingCorpus? trainingCorpus,
PretranslateCorpus? pretranslateCorpus,
bool noTrainingCorpusDefined,
bool noPretranslateCorpusDefined
bool noTrainingCorpusFilter,
bool noPretranslateCorpusFilter
)
{
IEnumerable<V1.CorpusFile> sourceFiles = source.SourceFiles.Select(Map);
Expand All @@ -641,7 +641,7 @@ bool noPretranslateCorpusDefined
new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } };

if (
noTrainingCorpusDefined
noTrainingCorpusFilter
|| (trainingCorpus is not null && trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null)
)
{
Expand Down Expand Up @@ -684,7 +684,7 @@ bool noPretranslateCorpusDefined
}
}
if (
noPretranslateCorpusDefined
noPretranslateCorpusFilter
|| (
pretranslateCorpus is not null
&& pretranslateCorpus.TextIds is null
Expand Down Expand Up @@ -739,15 +739,24 @@ private V1.ParallelCorpus Map(
Models.ParallelCorpus source,
TrainingCorpus? trainingCorpus,
PretranslateCorpus? pretranslateCorpus,
bool noTrainingCorpusDefined,
bool noPretranslateCorpusDefined
bool noTrainingCorpusFilter,
bool noPretranslateCorpusFilter
)
{
string? referenceFileLocation =
source.TargetCorpora.Count > 0 && source.TargetCorpora[0].Files.Count > 0
? Map(source.TargetCorpora[0].Files[0]).Location
: null;

bool sourceTrainOnAll =
noTrainingCorpusFilter || (trainingCorpus is not null && trainingCorpus.SourceFilters is null);
bool sourcePretranslateAll =
noPretranslateCorpusFilter || (pretranslateCorpus is not null && pretranslateCorpus.SourceFilters is null);

bool targetTrainOnAll =
noTrainingCorpusFilter || (trainingCorpus is not null && trainingCorpus.TargetFilters is null);
bool targetPretranslateAll = noPretranslateCorpusFilter || pretranslateCorpus is not null; // there is no pretranslate Target filter.

return new V1.ParallelCorpus
{
Id = source.Id,
Expand All @@ -759,9 +768,8 @@ bool noPretranslateCorpusDefined
trainingCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
pretranslateCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
referenceFileLocation,
noTrainingCorpusDefined || (trainingCorpus is not null && trainingCorpus.SourceFilters is null),
noPretranslateCorpusDefined
|| (pretranslateCorpus is not null && pretranslateCorpus.SourceFilters is null)
sourceTrainOnAll,
sourcePretranslateAll
)
)
},
Expand All @@ -773,8 +781,8 @@ bool noPretranslateCorpusDefined
trainingCorpus?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(),
null,
referenceFileLocation,
noTrainingCorpusDefined || (trainingCorpus is not null && trainingCorpus.TargetFilters is null),
noPretranslateCorpusDefined || pretranslateCorpus is not null // there is no pretranslate Target filter.
targetTrainOnAll,
targetPretranslateAll
)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ await env.Service.StartBuildAsync(
}

[Test]
public async Task StartBuildAsync_OneEachOfMultipleCorpora()
public async Task StartBuildAsync_TrainOnOnePretranslateOnTheOther()
{
var env = new TestEnvironment();
string engineId = (await env.CreateMultipleCorporaEngineWithTextFilesAsync()).Id;
Expand Down Expand Up @@ -1004,7 +1004,7 @@ await env.Service.StartBuildAsync(
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_OneOfEachMultipleCorpora()
public async Task StartBuildAsync_ParallelCorpus_TrainOnOnePretranslateOnTheOther()
{
var env = new TestEnvironment();
string engineId = (await env.CreateMultipleParallelCorpusEngineWithTextFilesAsync()).Id;
Expand Down

0 comments on commit 6e9f9c5

Please sign in to comment.