Skip to content

Commit

Permalink
Fixes #202
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Nov 7, 2023
1 parent af79060 commit 5e72696
Show file tree
Hide file tree
Showing 12 changed files with 106 additions and 10 deletions.
30 changes: 30 additions & 0 deletions src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4426,6 +4426,9 @@ public partial class TranslationBuild
[System.ComponentModel.DataAnnotations.Required]
public ResourceLink Engine { get; set; } = new ResourceLink();

[Newtonsoft.Json.JsonProperty("trainOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<TrainingCorpus>? TrainOn { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("pretranslate", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<PretranslateCorpus>? Pretranslate { get; set; } = default!;

Expand Down Expand Up @@ -4457,6 +4460,18 @@ public partial class TranslationBuild

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "13.18.2.0 (NJsonSchema v10.8.0.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class TrainingCorpus
{
[Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required]
public ResourceLink Corpus { get; set; } = new ResourceLink();

[Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<string>? TextIds { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "13.18.2.0 (NJsonSchema v10.8.0.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class PretranslateCorpus
{
Expand Down Expand Up @@ -4496,6 +4511,9 @@ public partial class TranslationBuildConfig
[Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public string? Name { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("trainOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<TrainingCorpusConfig>? TrainOn { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("pretranslate", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<PretranslateCorpusConfig>? Pretranslate { get; set; } = default!;

Expand All @@ -4504,6 +4522,18 @@ public partial class TranslationBuildConfig

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "13.18.2.0 (NJsonSchema v10.8.0.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class TrainingCorpusConfig
{
[Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string CorpusId { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<string>? TextIds { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "13.18.2.0 (NJsonSchema v10.8.0.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class PretranslateCorpusConfig
{
Expand Down
10 changes: 6 additions & 4 deletions src/Serval.Grpc/Protos/serval/translation/v1/engine.proto
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,12 @@ message Corpus {
string id = 1;
string source_language = 2;
string target_language = 3;
bool pretranslate_all = 4;
repeated string pretranslate_text_ids = 5;
repeated CorpusFile source_files = 6;
repeated CorpusFile target_files = 7;
bool train_on_all = 4;
bool pretranslate_all = 5;
repeated string train_on_text_ids = 6;
repeated string pretranslate_text_ids = 7;
repeated CorpusFile source_files = 8;
repeated CorpusFile target_files = 9;
}

message CorpusFile {
Expand Down
7 changes: 7 additions & 0 deletions src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace Serval.Translation.Contracts;

public class TrainingCorpusConfigDto
{
public string CorpusId { get; set; } = default!;
public IList<string>? TextIds { get; set; }
}
8 changes: 8 additions & 0 deletions src/Serval.Translation/Contracts/TrainingCorpusDto.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Serval.Translation.Contracts;

public class TrainingCorpusDto
{
public ResourceLinkDto Corpus { get; set; } = default!;

public IList<string>? TextIds { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
public class TranslationBuildConfigDto
{
public string? Name { get; set; }
public IList<TrainingCorpusConfigDto>? TrainOn { get; set; }
public IList<PretranslateCorpusConfigDto>? Pretranslate { get; set; }

/// <example>
Expand Down
1 change: 1 addition & 0 deletions src/Serval.Translation/Contracts/TranslationBuildDto.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ public class TranslationBuildDto
public int Revision { get; set; }
public string? Name { get; set; }
public ResourceLinkDto Engine { get; set; } = default!;
public IList<TrainingCorpusDto>? TrainOn { get; set; }
public IList<PretranslateCorpusDto>? Pretranslate { get; set; }
public int Step { get; set; }
public double? PercentCompleted { get; set; }
Expand Down
31 changes: 29 additions & 2 deletions src/Serval.Translation/Controllers/TranslationEnginesController.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
namespace Serval.Translation.Controllers;
using System.Net.Sockets;

namespace Serval.Translation.Controllers;

[ApiVersion(1.0)]
[Route("api/v{version:apiVersion}/translation/engines")]
Expand Down Expand Up @@ -998,10 +1000,10 @@ private Engine Map(TranslationEngineConfigDto source)
private static Build Map(Engine engine, TranslationBuildConfigDto source)
{
var build = new Build { EngineRef = engine.Id, Name = source.Name };
var corpusIds = new HashSet<string>(engine.Corpora.Select(c => c.Id));
if (source.Pretranslate != null)
{
var pretranslateCorpora = new List<PretranslateCorpus>();
var corpusIds = new HashSet<string>(engine.Corpora.Select(c => c.Id));
foreach (PretranslateCorpusConfigDto ptcc in source.Pretranslate)
{
if (!corpusIds.Contains(ptcc.CorpusId))
Expand All @@ -1013,6 +1015,17 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source)
}
build.Pretranslate = pretranslateCorpora;
}
if (source.TrainOn != null)
{
var trainOnCorpora = new List<TrainingCorpus>();
foreach (TrainingCorpusConfigDto tcc in source.TrainOn)
{
if (!corpusIds.Contains(tcc.CorpusId))
throw new InvalidOperationException($"The corpus {tcc.CorpusId} is not valid.");
trainOnCorpora.Add(new TrainingCorpus { CorpusRef = tcc.CorpusId, TextIds = tcc.TextIds?.ToList() });
}
build.TrainOn = trainOnCorpora;
}
try
{
var jsonSerializerOptions = new JsonSerializerOptions();
Expand Down Expand Up @@ -1061,6 +1074,7 @@ private TranslationBuildDto Map(Build source)
Id = source.EngineRef,
Url = _urlService.GetUrl("GetTranslationEngine", new { id = source.EngineRef })
},
TrainOn = source.TrainOn?.Select(s => Map(source.EngineRef, s)).ToList(),
Pretranslate = source.Pretranslate?.Select(s => Map(source.EngineRef, s)).ToList(),
Step = source.Step,
PercentCompleted = source.PercentCompleted,
Expand All @@ -1085,6 +1099,19 @@ private PretranslateCorpusDto Map(string engineId, PretranslateCorpus source)
};
}

private TrainingCorpusDto Map(string engineId, TrainingCorpus source)
{
return new TrainingCorpusDto
{
Corpus = new ResourceLinkDto
{
Id = source.CorpusRef,
Url = _urlService.GetUrl("GetTranslationCorpus", new { id = engineId, corpusId = source.CorpusRef })
},
TextIds = source.TextIds
};
}

private TranslationResultDto Map(TranslationResult source)
{
return new TranslationResultDto
Expand Down
1 change: 1 addition & 0 deletions src/Serval.Translation/Models/Build.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ public class Build : IEntity
public int Revision { get; set; } = 1;
public string? Name { get; set; }
public string EngineRef { get; set; } = default!;
public IList<TrainingCorpus>? TrainOn { get; set; }
public List<PretranslateCorpus>? Pretranslate { get; set; }
public int Step { get; set; }
public double? PercentCompleted { get; set; }
Expand Down
7 changes: 7 additions & 0 deletions src/Serval.Translation/Models/TrainingCorpus.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace Serval.Translation.Models;

public class TrainingCorpus
{
public string CorpusRef { get; set; } = default!;
public IList<string>? TextIds { get; set; }
}
7 changes: 7 additions & 0 deletions src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ public async Task<bool> StartBuildAsync(Build build, CancellationToken cancellat
try
{
Dictionary<string, PretranslateCorpus>? pretranslate = build.Pretranslate?.ToDictionary(c => c.CorpusRef);
Dictionary<string, TrainingCorpus>? trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef);
var client = _grpcClientFactory.CreateClient<TranslationEngineApi.TranslationEngineApiClient>(engine.Type);
var request = new StartBuildRequest
{
Expand All @@ -210,6 +211,12 @@ public async Task<bool> StartBuildAsync(Build build, CancellationToken cancellat
if (pretranslateCorpus.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateCorpus.TextIds);
}
if (trainOn?.TryGetValue(c.Id, out TrainingCorpus? trainingCorpus) ?? false)
{
corpus.TrainOnAll = trainingCorpus.TextIds is null || trainingCorpus.TextIds.Count == 0;
if (trainingCorpus.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingCorpus.TextIds);
}
return corpus;
})
}
Expand Down
9 changes: 6 additions & 3 deletions tests/Serval.E2ETests/ServalApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,16 @@ public async Task NmtBatch()
await _helperClient!.ClearEngines();
string engineId = await _helperClient.CreateNewEngine("Nmt", "es", "en", "NMT1");
var books = new string[] { "MAT.txt", "1JN.txt", "2JN.txt" };
await _helperClient.AddTextCorpusToEngine(engineId, books, "es", "en", false);
var cId = await _helperClient.AddTextCorpusToEngine(engineId, new string[] { "3JN.txt" }, "es", "en", true);
var cId1 = await _helperClient.AddTextCorpusToEngine(engineId, books, "es", "en", false);
_helperClient.TranslationBuildConfig.TrainOn!.Add(
new TrainingCorpusConfig { CorpusId = cId1, TextIds = new string[] { "1JN.txt" } }
);
var cId2 = await _helperClient.AddTextCorpusToEngine(engineId, new string[] { "3JN.txt" }, "es", "en", true);
await _helperClient.BuildEngine(engineId);
await Task.Delay(1000);
IList<Pretranslation> lTrans = await _helperClient.translationEnginesClient.GetAllPretranslationsAsync(
engineId,
cId
cId2
);
Assert.IsTrue(lTrans[0].Translation.Contains("dearly beloved Gaius"));
}
Expand Down
4 changes: 3 additions & 1 deletion tests/Serval.E2ETests/ServalClientHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ public ServalClientHelper(string audience, string prefix = "SCE_", bool ignoreSS
TranslationBuildConfig = new TranslationBuildConfig
{
Pretranslate = new List<PretranslateCorpusConfig>(),
Options = "{\"max_steps\":10}"
Options = "{\"max_steps\":10}",
TrainOn = new List<TrainingCorpusConfig>()
};
}

Expand Down Expand Up @@ -86,6 +87,7 @@ public async Task ClearEngines(string name = "")
}
}
TranslationBuildConfig.Pretranslate = new List<PretranslateCorpusConfig>();
TranslationBuildConfig.TrainOn = new List<TrainingCorpusConfig>();
EnginePerUser.Clear();
}

Expand Down

0 comments on commit 5e72696

Please sign in to comment.