-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f72b568
commit 82c30fe
Showing
14 changed files
with
493 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13 changes: 13 additions & 0 deletions
13
src/Machine/src/Serval.Machine.Shared/Models/WordAlignmentEngine.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
namespace Serval.Machine.Shared.Models; | ||
|
||
public record WordAlignmentEngine : IEntity | ||
{ | ||
public string Id { get; set; } = ""; | ||
public int Revision { get; set; } = 1; | ||
public required string EngineId { get; init; } | ||
public required WordAlignmentEngineType Type { get; init; } | ||
public required string SourceLanguage { get; init; } | ||
public required string TargetLanguage { get; init; } | ||
public int BuildRevision { get; init; } | ||
public Build? CurrentBuild { get; init; } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentEngineService.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
namespace Serval.Machine.Shared.Services; | ||
|
||
public interface IWordAlignmentEngineService | ||
{ | ||
WordAlignmentEngineType WordAlignmentEngine { get; } | ||
|
||
Task<WordAlignmentEngine> CreateAsync( | ||
string engineId, | ||
string? engineName, | ||
string sourceLanguage, | ||
string targetLanguage, | ||
bool? isModelPersisted = null, | ||
CancellationToken cancellationToken = default | ||
); | ||
Task DeleteAsync(string engineId, CancellationToken cancellationToken = default); | ||
|
||
Task<TranslationResult> GetBestPhraseAlignmentAsync( | ||
string sourceSegment, | ||
string targetSegment, | ||
CancellationToken cancellationToken = default | ||
); | ||
|
||
Task StartBuildAsync( | ||
string engineId, | ||
string buildId, | ||
string? buildOptions, | ||
IReadOnlyList<ParallelCorpus> corpora, | ||
CancellationToken cancellationToken = default | ||
); | ||
|
||
Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default); | ||
|
||
int GetQueueSize(); | ||
} |
205 changes: 205 additions & 0 deletions
205
src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
using Google.Protobuf.WellKnownTypes; | ||
using Serval.WordAlignment.V1; | ||
|
||
namespace Serval.Machine.Shared.Services; | ||
|
||
public class ServalWordAlignmentEngineServiceV1(IEnumerable<IWordAlignmentEngineService> engineServices) | ||
: WordAlignmentEngineApi.WordAlignmentEngineApiBase | ||
{ | ||
private static readonly Empty Empty = new(); | ||
|
||
private readonly Dictionary<WordAlignmentEngineType, IWordAlignmentEngineService> _engineServices = | ||
engineServices.ToDictionary(es => es.WordAlignmentEngine); | ||
|
||
public override async Task<Empty> Create(CreateRequest request, ServerCallContext context) | ||
{ | ||
IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); | ||
await engineService.CreateAsync( | ||
request.EngineId, | ||
request.HasEngineName ? request.EngineName : null, | ||
request.SourceLanguage, | ||
request.TargetLanguage, | ||
isModelPersisted: true, | ||
cancellationToken: context.CancellationToken | ||
); | ||
return Empty; | ||
} | ||
|
||
public override async Task<Empty> Delete(DeleteRequest request, ServerCallContext context) | ||
{ | ||
IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); | ||
await engineService.DeleteAsync(request.EngineId, context.CancellationToken); | ||
return Empty; | ||
} | ||
|
||
public override async Task<GetWordAlignmentResponse> GetWordAlignment( | ||
GetWordAlignmentRequest request, | ||
ServerCallContext context | ||
) | ||
{ | ||
IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); | ||
TranslationResult result; | ||
try | ||
{ | ||
result = await engineService.GetBestPhraseAlignmentAsync( | ||
request.SourceSegment, | ||
request.TargetSegment, | ||
context.CancellationToken | ||
); | ||
} | ||
catch (EngineNotBuiltException e) | ||
{ | ||
throw new RpcException(new Status(StatusCode.Aborted, e.Message, e)); | ||
} | ||
|
||
return new GetWordAlignmentResponse { Result = Map(result) }; | ||
} | ||
|
||
public override async Task<Empty> StartBuild(StartBuildRequest request, ServerCallContext context) | ||
{ | ||
IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); | ||
Models.ParallelCorpus[] corpora = request.Corpora.Select(Map).ToArray(); | ||
try | ||
{ | ||
await engineService.StartBuildAsync( | ||
request.EngineId, | ||
request.BuildId, | ||
request.HasOptions ? request.Options : null, | ||
corpora, | ||
context.CancellationToken | ||
); | ||
} | ||
catch (InvalidOperationException e) | ||
{ | ||
throw new RpcException(new Status(StatusCode.Aborted, e.Message, e)); | ||
} | ||
return Empty; | ||
} | ||
|
||
public override async Task<Empty> CancelBuild(CancelBuildRequest request, ServerCallContext context) | ||
{ | ||
IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); | ||
try | ||
{ | ||
await engineService.CancelBuildAsync(request.EngineId, context.CancellationToken); | ||
} | ||
catch (InvalidOperationException e) | ||
{ | ||
throw new RpcException(new Status(StatusCode.Aborted, e.Message, e)); | ||
} | ||
return Empty; | ||
} | ||
|
||
public override Task<GetQueueSizeResponse> GetQueueSize(GetQueueSizeRequest request, ServerCallContext context) | ||
{ | ||
IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); | ||
return Task.FromResult(new GetQueueSizeResponse { Size = engineService.GetQueueSize() }); | ||
} | ||
|
||
private IWordAlignmentEngineService GetEngineService(string engineTypeStr) | ||
{ | ||
if (_engineServices.TryGetValue(GetEngineType(engineTypeStr), out IWordAlignmentEngineService? service)) | ||
return service; | ||
throw new RpcException(new Status(StatusCode.InvalidArgument, "The engine type is invalid.")); | ||
} | ||
|
||
private static WordAlignmentEngineType GetEngineType(string engineTypeStr) | ||
{ | ||
engineTypeStr = engineTypeStr[0].ToString().ToUpperInvariant() + engineTypeStr[1..]; | ||
if (System.Enum.TryParse(engineTypeStr, out WordAlignmentEngineType engineType)) | ||
return engineType; | ||
throw new RpcException(new Status(StatusCode.InvalidArgument, "The engine type is invalid.")); | ||
} | ||
|
||
private static WordAlignmentResult Map(TranslationResult source) | ||
{ | ||
return new WordAlignmentResult | ||
{ | ||
SourceTokens = { source.SourceTokens }, | ||
TargetTokens = { source.TargetTokens }, | ||
Confidences = { source.Confidences }, | ||
Alignment = { Map(source.Alignment) }, | ||
}; | ||
} | ||
|
||
private static IEnumerable<WordAlignment.V1.AlignedWordPair> Map(WordAlignmentMatrix source) | ||
{ | ||
for (int i = 0; i < source.RowCount; i++) | ||
{ | ||
for (int j = 0; j < source.ColumnCount; j++) | ||
{ | ||
if (source[i, j]) | ||
yield return new WordAlignment.V1.AlignedWordPair { SourceIndex = i, TargetIndex = j }; | ||
} | ||
} | ||
} | ||
|
||
private static Models.ParallelCorpus Map(WordAlignment.V1.ParallelCorpus source) | ||
{ | ||
return new Models.ParallelCorpus | ||
{ | ||
Id = source.Id, | ||
SourceCorpora = source.SourceCorpora.Select(Map).ToList(), | ||
TargetCorpora = source.TargetCorpora.Select(Map).ToList() | ||
}; | ||
} | ||
|
||
private static Models.MonolingualCorpus Map(WordAlignment.V1.MonolingualCorpus source) | ||
{ | ||
var trainOnChapters = source.TrainOnChapters.ToDictionary( | ||
kvp => kvp.Key, | ||
kvp => kvp.Value.Chapters.ToHashSet() | ||
); | ||
var trainOnTextIds = source.TrainOnTextIds.ToHashSet(); | ||
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds); | ||
|
||
var pretranslateChapters = source.WordAlignOnChapters.ToDictionary( | ||
kvp => kvp.Key, | ||
kvp => kvp.Value.Chapters.ToHashSet() | ||
); | ||
var pretranslateTextIds = source.WordAlignOnTextIds.ToHashSet(); | ||
FilterChoice pretranslateFilter = GetFilterChoice(pretranslateChapters, pretranslateTextIds); | ||
|
||
return new Models.MonolingualCorpus | ||
{ | ||
Id = source.Id, | ||
Language = source.Language, | ||
Files = source.Files.Select(Map).ToList(), | ||
TrainOnChapters = trainingFilter == FilterChoice.Chapters ? trainOnChapters : null, | ||
TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? trainOnTextIds : null, | ||
PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null, | ||
PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null | ||
}; | ||
} | ||
|
||
private static Models.CorpusFile Map(WordAlignment.V1.CorpusFile source) | ||
{ | ||
return new Models.CorpusFile | ||
{ | ||
Location = source.Location, | ||
Format = (Models.FileFormat)source.Format, | ||
TextId = source.TextId | ||
}; | ||
} | ||
|
||
private enum FilterChoice | ||
{ | ||
Chapters, | ||
TextIds, | ||
None | ||
} | ||
|
||
private static FilterChoice GetFilterChoice( | ||
IReadOnlyDictionary<string, HashSet<int>> chapters, | ||
HashSet<string> textIds | ||
) | ||
{ | ||
// Only either textIds or Scripture Range will be used at a time | ||
// TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text) | ||
if (chapters is null && textIds is null) | ||
return FilterChoice.None; | ||
if (chapters is null || chapters.Count == 0) | ||
return FilterChoice.TextIds; | ||
return FilterChoice.Chapters; | ||
} | ||
} |
Oops, something went wrong.