Skip to content

Commit

Permalink
Pretranslate even if there isn't a target corpus.
Browse files Browse the repository at this point in the history
Put "TrainOnAll" and "PretranslateAll" back in
Don't do https redirection
Fixes for E2E testing
Add E2E test
Fix echo
  • Loading branch information
johnml1135 committed Oct 15, 2024
1 parent 261dde1 commit 6c95566
Show file tree
Hide file tree
Showing 12 changed files with 382 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
build:
name: Build
runs-on: ubuntu-latest
timeout-minutes: 45
timeout-minutes: 60

env:
SERVAL_CLIENT_ID: ${{ secrets.SERVAL_CLIENT_ID }}
Expand Down
3 changes: 0 additions & 3 deletions src/Echo/src/EchoTranslationEngine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

WebApplication app = builder.Build();

// Configure the HTTP request pipeline.
app.UseHttpsRedirection();

app.MapGrpcService<TranslationEngineServiceV1>();
app.MapGrpcService<HealthServiceV1>();

Expand Down
12 changes: 10 additions & 2 deletions src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,23 @@ await client.BuildStartedAsync(
var sourceFiles = corpus
.SourceCorpora.SelectMany(sc =>
sc.Files.Where(f =>
(sc.PretranslateTextIds is null || sc.PretranslateTextIds.Contains(f.TextId))
(
sc.PretranslateAll
|| sc.PretranslateTextIds is null
|| sc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);
var targetFiles = corpus
.TargetCorpora.SelectMany(tc =>
tc.Files.Where(f =>
(tc.PretranslateTextIds is null || tc.PretranslateTextIds.Contains(f.TextId))
(
tc.PretranslateAll
|| tc.PretranslateTextIds is null
|| tc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
Expand Down
2 changes: 0 additions & 2 deletions src/Machine/src/Serval.Machine.EngineServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

var app = builder.Build();

app.UseHttpsRedirection();

app.MapServalTranslationEngineService();
app.MapHangfireDashboard();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,22 +238,29 @@ row.Ref is not ScriptureRef sr
}
}
}
void WriteRow(Utf8JsonWriter writer, string textId, IReadOnlyList<object> refs, string translation)
{
writer.WriteStartObject();
writer.WriteString("corpusId", corpus.Id);
writer.WriteString("textId", textId);
writer.WriteStartArray("refs");
foreach (object rowRef in refs)
writer.WriteStringValue(rowRef.ToString());
writer.WriteEndArray();
writer.WriteString("translation", translation);
writer.WriteEndObject();
pretranslateCount++;
}

ITextCorpus targetCorpus =
targetCorpora.Length > 0
? targetCorpora[0].TextCorpus
: new DictionaryTextCorpus(new List<MemoryText>());

foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpora[0].TextCorpus))
foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpus))
{
if (row.SourceSegment.Length > 0)
{
pretranslateWriter.WriteStartObject();
pretranslateWriter.WriteString("corpusId", corpus.Id);
pretranslateWriter.WriteString("textId", row.TextId);
pretranslateWriter.WriteStartArray("refs");
foreach (object rowRef in row.Refs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
pretranslateWriter.WriteString("translation", row.SourceSegment);
pretranslateWriter.WriteEndObject();
pretranslateCount++;
}
WriteRow(pretranslateWriter, row.TextId, row.Refs, row.SourceSegment);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,16 +286,20 @@ private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus sou
kvp => kvp.Value.Chapters.ToHashSet()
);
var trainOnTextIds = source.TrainOnTextIds.ToHashSet();
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds);
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds, source.TrainOnAll);

var pretranslateChapters = source.PretranslateChapters.ToDictionary(
kvp => kvp.Key,
kvp => kvp.Value.Chapters.ToHashSet()
);
var pretranslateTextIds = source.PretranslateTextIds.ToHashSet();
FilterChoice pretranslateFilter = GetFilterChoice(pretranslateChapters, pretranslateTextIds);
FilterChoice pretranslateFilter = GetFilterChoice(
pretranslateChapters,
pretranslateTextIds,
source.PretranslateAll
);

return new Models.MonolingualCorpus
var corpus = new Models.MonolingualCorpus
{
Id = source.Id,
Language = source.Language,
Expand All @@ -305,6 +309,7 @@ private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus sou
PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null,
PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null
};
return corpus;
}

private static Models.CorpusFile Map(Translation.V1.CorpusFile source)
Expand All @@ -326,12 +331,13 @@ private enum FilterChoice

private static FilterChoice GetFilterChoice(
IReadOnlyDictionary<string, HashSet<int>> chapters,
HashSet<string> textIds
HashSet<string> textIds,
bool noFilter
)
{
// Only either textIds or Scripture Range will be used at a time
// TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text)
if (chapters is null && textIds is null)
if (noFilter || (chapters is null && textIds is null))
return FilterChoice.None;
if (chapters is null || chapters.Count == 0)
return FilterChoice.TextIds;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ message ParallelCorpus {
message MonolingualCorpus {
string id = 1;
string language = 2;
bool train_on_all = 3;
bool pretranslate_all = 4;
map<string, ScriptureChapters> train_on_chapters = 5;
map<string, ScriptureChapters> pretranslate_chapters = 6;
repeated string train_on_text_ids = 7;
Expand Down
51 changes: 41 additions & 10 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,12 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
V1.MonolingualCorpus targetCorpus =
new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } };

if (trainingCorpus != null)
if (trainingCorpus is null || (trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null))
{
sourceCorpus.TrainOnAll = true;
targetCorpus.TrainOnAll = true;
}
else
{
if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -636,7 +641,15 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
targetCorpus.TrainOnChapters.Add(chapters);
}
}
if (pretranslateCorpus != null)
if (
pretranslateCorpus is null
|| (pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null)
)
{
sourceCorpus.PretranslateAll = true;
targetCorpus.PretranslateAll = true;
}
else
{
if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -767,14 +780,32 @@ pretranslateFilter is not null
Files = { source.Files.Select(Map) }
};

if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
if (pretranslateChapters is not null)
corpus.PretranslateChapters.Add(pretranslateChapters);
if (pretranslateFilter?.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds);
if (trainingFilter is null || (trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null))
{
corpus.TrainOnAll = true;
}
else
{
if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
}

if (
pretranslateFilter is null
|| (pretranslateFilter.TextIds is null && pretranslateFilter.ScriptureRange is null)
)
{
corpus.PretranslateAll = true;
}
else
{
if (pretranslateChapters is not null)
corpus.PretranslateChapters.Add(pretranslateChapters);
if (pretranslateFilter?.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds);
}

return corpus;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,105 @@ public async Task StartBuildAsync_ParallelCorpus()
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_Corpus_NoFilter()
{
TranslationEnginesClient client = _env.CreateTranslationEnginesClient();
TranslationCorpus addedCorpus = await client.AddCorpusAsync(NMT_ENGINE1_ID, TestCorpusConfig);
PretranslateCorpusConfig ptcc =
new() { CorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] };
TrainingCorpusConfig tcc =
new()
{
CorpusId = addedCorpus.Id,
SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }],
TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }]
};
;
TranslationBuildConfig tbc = new TranslationBuildConfig
{
Pretranslate = [ptcc],
TrainOn = [tcc],
Options = """
{"max_steps":10,
"use_key_terms":false,
"some_double":10.5,
"some_nested": {"more_nested": {"other_double":10.5}},
"some_string":"string"}
"""
};
TranslationBuild resultAfterStart;
Assert.ThrowsAsync<ServalApiException>(async () =>
{
resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
});

TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc);
Assert.That(build, Is.Not.Null);
Assert.That(build.TrainOn, Is.Not.Null);
Assert.That(build.TrainOn.Count, Is.EqualTo(1));
Assert.That(build.TrainOn[0].TextIds, Is.Null);
Assert.That(build.TrainOn[0].ScriptureRange, Is.Null);
Assert.That(build.Pretranslate, Is.Not.Null);
Assert.That(build.Pretranslate.Count, Is.EqualTo(1));
Assert.That(build.Pretranslate[0].TextIds, Is.Null);
Assert.That(build.Pretranslate[0].ScriptureRange, Is.Null);

build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_NoFilter()
{
TranslationEnginesClient client = _env.CreateTranslationEnginesClient();
TranslationParallelCorpus addedCorpus = await client.AddParallelCorpusAsync(
NMT_ENGINE1_ID,
TestParallelCorpusConfig
);
PretranslateCorpusConfig ptcc =
new() { ParallelCorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] };
TrainingCorpusConfig tcc =
new()
{
ParallelCorpusId = addedCorpus.Id,
SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }],
TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }]
};
;
TranslationBuildConfig tbc = new TranslationBuildConfig
{
Pretranslate = [ptcc],
TrainOn = [tcc],
Options = """
{"max_steps":10,
"use_key_terms":false,
"some_double":10.5,
"some_nested": {"more_nested": {"other_double":10.5}},
"some_string":"string"}
"""
};
TranslationBuild resultAfterStart;
Assert.ThrowsAsync<ServalApiException>(async () =>
{
resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
});

TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc);
Assert.That(build, Is.Not.Null);
Assert.That(build.TrainOn, Is.Not.Null);
Assert.That(build.TrainOn.Count, Is.EqualTo(1));
Assert.That(build.TrainOn[0].TextIds, Is.Null);
Assert.That(build.TrainOn[0].ScriptureRange, Is.Null);
Assert.That(build.Pretranslate, Is.Not.Null);
Assert.That(build.Pretranslate.Count, Is.EqualTo(1));
Assert.That(build.Pretranslate[0].TextIds, Is.Null);
Assert.That(build.Pretranslate[0].ScriptureRange, Is.Null);

build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_PretranslateParallelAndNormalCorpus()
{
Expand Down
10 changes: 5 additions & 5 deletions src/Serval/test/Serval.E2ETests/ServalApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ public async Task NmtQueueMultiple()
engineIds[i] = await _helperClient.CreateNewEngineAsync("Nmt", "es", "en", $"NMT1_{i}");
string engineId = engineIds[i];
string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"];
await _helperClient.AddTextCorpusToEngineAsync(engineId, books, "es", "en", false);
await _helperClient.AddTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true);
await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, books, "es", "en", false);
await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, ["3JN.txt"], "es", "en", true);
await _helperClient.StartBuildAsync(engineId);
//Ensure that tasks are enqueued roughly in order
await Task.Delay(1_000);
Expand Down Expand Up @@ -247,7 +247,7 @@ public async Task CircuitousRouteGetWordGraphAsync()
Assert.That(ex.StatusCode, Is.EqualTo(409));

//Add corpus
string cId = await _helperClient.AddTextCorpusToEngineAsync(
string cId = await _helperClient.AddParallelTextCorpusToEngineAsync(
smtEngineId,
["2JN.txt", "3JN.txt"],
"es",
Expand All @@ -259,10 +259,10 @@ public async Task CircuitousRouteGetWordGraphAsync()
await _helperClient.BuildEngineAsync(smtEngineId);

//Remove added corpus (shouldn't affect translation)
await _helperClient.TranslationEnginesClient.DeleteCorpusAsync(smtEngineId, cId, deleteFiles: false);
await _helperClient.TranslationEnginesClient.DeleteParallelCorpusAsync(smtEngineId, cId);

// Add corpus
await _helperClient.AddTextCorpusToEngineAsync(
await _helperClient.AddParallelTextCorpusToEngineAsync(
smtEngineId,
["1JN.txt", "2JN.txt", "3JN.txt"],
"es",
Expand Down
Loading

0 comments on commit 6c95566

Please sign in to comment.