Skip to content

Commit

Permalink
Don't train/pretranslate on other corpora if one is already defined.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Oct 25, 2024
1 parent c154fc5 commit af1aa56
Showing 1 changed file with 55 additions and 15 deletions.
70 changes: 55 additions & 15 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,13 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok
Corpora =
{
engine.ParallelCorpora.Select(c =>
Map(c, trainOn?.GetValueOrDefault(c.Id), pretranslate?.GetValueOrDefault(c.Id))
Map(
c,
trainOn?.GetValueOrDefault(c.Id),
pretranslate?.GetValueOrDefault(c.Id),
trainOn is null,
pretranslate is null
)
)
}
};
Expand All @@ -255,7 +261,13 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok
Corpora =
{
engine.Corpora.Select(c =>
Map(c, trainOn?.GetValueOrDefault(c.Id), pretranslate?.GetValueOrDefault(c.Id))
Map(
c,
trainOn?.GetValueOrDefault(c.Id),
pretranslate?.GetValueOrDefault(c.Id),
trainOn is null,
pretranslate is null
)
)
}
};
Expand Down Expand Up @@ -592,7 +604,13 @@ private Models.WordGraphArc Map(V1.WordGraphArc source)
};
}

private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, PretranslateCorpus? pretranslateCorpus)
private V1.ParallelCorpus Map(
Corpus source,
TrainingCorpus? trainingCorpus,
PretranslateCorpus? pretranslateCorpus,
bool noTrainingCorpusDefined,
bool noPretranslateCorpusDefined
)
{
IEnumerable<V1.CorpusFile> sourceFiles = source.SourceFiles.Select(Map);
IEnumerable<V1.CorpusFile> targetFiles = source.TargetFiles.Select(Map);
Expand All @@ -601,12 +619,15 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
V1.MonolingualCorpus targetCorpus =
new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } };

if (trainingCorpus is null || (trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null))
if (
noTrainingCorpusDefined
|| (trainingCorpus is not null && trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null)
)
{
sourceCorpus.TrainOnAll = true;
targetCorpus.TrainOnAll = true;
}
else
else if (trainingCorpus is not null)
{
if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -642,14 +663,18 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
}
}
if (
pretranslateCorpus is null
|| (pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null)
noPretranslateCorpusDefined
|| (
pretranslateCorpus is not null
&& pretranslateCorpus.TextIds is null
&& pretranslateCorpus.ScriptureRange is null
)
)
{
sourceCorpus.PretranslateAll = true;
targetCorpus.PretranslateAll = true;
}
else
else if (pretranslateCorpus is not null)
{
if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -692,7 +717,9 @@ pretranslateCorpus is null
private V1.ParallelCorpus Map(
Models.ParallelCorpus source,
TrainingCorpus? trainingCorpus,
PretranslateCorpus? pretranslateCorpus
PretranslateCorpus? pretranslateCorpus,
bool noTrainingCorpusDefined,
bool noPretranslateCorpusDefined
)
{
string? referenceFileLocation =
Expand All @@ -710,7 +737,9 @@ private V1.ParallelCorpus Map(
sc,
trainingCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
pretranslateCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
referenceFileLocation
referenceFileLocation,
noTrainingCorpusDefined,
noPretranslateCorpusDefined
)
)
},
Expand All @@ -721,7 +750,9 @@ private V1.ParallelCorpus Map(
tc,
trainingCorpus?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(),
null,
referenceFileLocation
referenceFileLocation,
noTrainingCorpusDefined,
noPretranslateCorpusDefined
)
)
}
Expand All @@ -732,7 +763,9 @@ private V1.MonolingualCorpus Map(
Models.MonolingualCorpus source,
ParallelCorpusFilter? trainingFilter,
ParallelCorpusFilter? pretranslateFilter,
string? referenceFileLocation
string? referenceFileLocation,
bool noTrainingCorpusDefined,
bool noPretranslateCorpusDefined
)
{
Dictionary<string, ScriptureChapters>? trainOnChapters = null;
Expand Down Expand Up @@ -780,7 +813,10 @@ pretranslateFilter is not null
Files = { source.Files.Select(Map) }
};

if (trainingFilter is null || (trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null))
if (
noTrainingCorpusDefined
|| (trainingFilter is not null && trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null)
)
{
corpus.TrainOnAll = true;
}
Expand All @@ -793,8 +829,12 @@ pretranslateFilter is not null
}

if (
pretranslateFilter is null
|| (pretranslateFilter.TextIds is null && pretranslateFilter.ScriptureRange is null)
noPretranslateCorpusDefined
|| (
pretranslateFilter is not null
&& pretranslateFilter.TextIds is null
&& pretranslateFilter.ScriptureRange is null
)
)
{
corpus.PretranslateAll = true;
Expand Down

0 comments on commit af1aa56

Please sign in to comment.