From 0dc5784ea13ce36e757964debbaca6675171ed7a Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 7 Nov 2024 14:54:01 -0500 Subject: [PATCH] Correct Incorrect engine type error EchoWordAlignment works --- .../Services/BuildJobService.cs | 4 +- .../Services/ClearMLMonitorService.cs | 14 +- .../Services/IBuildJobService.cs | 35 +---- .../Services/IBuildJobServiceBase.cs | 37 +++++ src/Serval/src/Serval.Client/Client.g.cs | 112 +++++++------- .../Services/EngineService.cs | 11 +- .../Services/EngineService.cs | 13 +- .../test/Serval.E2ETests/ServalApiTests.cs | 32 +++- .../Serval.E2ETests/ServalClientHelper.cs | 146 +++++++++++++----- 9 files changed, 259 insertions(+), 145 deletions(-) create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs diff --git a/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs b/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs index 7f263d9f..4b3bd5f4 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs @@ -13,12 +13,12 @@ public Task IsEngineBuilding(string engineId, CancellationToken cancellati return Engines.ExistsAsync(e => e.EngineId == engineId && e.CurrentBuild != null, cancellationToken); } - public Task> GetBuildingEnginesAsync( + public async Task> GetBuildingEnginesAsync( BuildJobRunnerType runner, CancellationToken cancellationToken = default ) { - return Engines.GetAllAsync( + return await Engines.GetAllAsync( e => e.CurrentBuild != null && e.CurrentBuild.BuildJobRunner == runner, cancellationToken ); diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs index e3758cd7..7d494c9b 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs @@ -54,9 +54,9 @@ private async Task MonitorClearMLTasksPerDomain(IServiceScope scope, Cancellatio IBuildJobService >(); - Dictionary> engineToBuildServiceDict = ( + Dictionary engineToBuildServiceDict = ( await translationBuildJobService.GetBuildingEnginesAsync(BuildJobRunnerType.ClearML, cancellationToken) - ).ToDictionary(e => (ITrainingEngine)e, e => (IBuildJobService)translationBuildJobService); + ).ToDictionary(e => (ITrainingEngine)e, e => (IBuildJobServiceBase)translationBuildJobService); foreach ( var engine in await wordAlignmentBuildJobService.GetBuildingEnginesAsync( @@ -65,7 +65,7 @@ var engine in await wordAlignmentBuildJobService.GetBuildingEnginesAsync( ) ) { - engineToBuildServiceDict[engine] = (IBuildJobService)wordAlignmentBuildJobService; + engineToBuildServiceDict[engine] = wordAlignmentBuildJobService; } if (engineToBuildServiceDict.Count == 0) @@ -237,7 +237,7 @@ await TrainJobFaultedAsync( private async Task TrainJobStartedAsync( IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, IPlatformService platformService, string engineId, string buildId, @@ -260,7 +260,7 @@ private async Task TrainJobStartedAsync( } private async Task TrainJobCompletedAsync( - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, EngineType engineType, string engineId, string buildId, @@ -291,7 +291,7 @@ CancellationToken cancellationToken private async Task TrainJobFaultedAsync( IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, IPlatformService platformService, string engineId, string buildId, @@ -324,7 +324,7 @@ await buildJobService.BuildJobFinishedAsync( private async Task TrainJobCanceledAsync( IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, IPlatformService platformService, string engineId, string buildId, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs index 34567d5d..f72ce755 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs @@ -1,43 +1,10 @@ namespace Serval.Machine.Shared.Services; -public interface IBuildJobService +public interface IBuildJobService : IBuildJobServiceBase where TEngine : ITrainingEngine { Task> GetBuildingEnginesAsync( BuildJobRunnerType runner, CancellationToken cancellationToken = default ); - - Task IsEngineBuilding(string engineId, CancellationToken cancellationToken = default); - - Task CreateEngineAsync(string engineId, string? name = null, CancellationToken cancellationToken = default); - - Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default); - - Task StartBuildJobAsync( - BuildJobRunnerType runnerType, - EngineType engineType, - string engineId, - string buildId, - BuildStage stage, - object? data = default, - string? buildOptions = default, - CancellationToken cancellationToken = default - ); - - Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( - string engineId, - CancellationToken cancellationToken = default - ); - - Task BuildJobStartedAsync(string engineId, string buildId, CancellationToken cancellationToken = default); - - Task BuildJobFinishedAsync( - string engineId, - string buildId, - bool buildComplete, - CancellationToken cancellationToken = default - ); - - Task BuildJobRestartingAsync(string engineId, string buildId, CancellationToken cancellationToken = default); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs new file mode 100644 index 00000000..16e771ce --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs @@ -0,0 +1,37 @@ +namespace Serval.Machine.Shared.Services; + +public interface IBuildJobServiceBase +{ + Task IsEngineBuilding(string engineId, CancellationToken cancellationToken = default); + + Task CreateEngineAsync(string engineId, string? name = null, CancellationToken cancellationToken = default); + + Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default); + + Task StartBuildJobAsync( + BuildJobRunnerType runnerType, + EngineType engineType, + string engineId, + string buildId, + BuildStage stage, + object? data = default, + string? buildOptions = default, + CancellationToken cancellationToken = default + ); + + Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( + string engineId, + CancellationToken cancellationToken = default + ); + + Task BuildJobStartedAsync(string engineId, string buildId, CancellationToken cancellationToken = default); + + Task BuildJobFinishedAsync( + string engineId, + string buildId, + bool buildComplete, + CancellationToken cancellationToken = default + ); + + Task BuildJobRestartingAsync(string engineId, string buildId, CancellationToken cancellationToken = default); +} diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index b4ba2685..35105c69 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -438,7 +438,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -450,7 +450,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -2410,7 +2410,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -2422,7 +2422,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -3134,7 +3134,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -3146,7 +3146,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -3192,7 +3192,7 @@ public partial interface IDataFilesClient /// /// /// Sample request: - ///
+ ///
///
POST /files ///
{ ///
"format": "text", @@ -3407,7 +3407,7 @@ public string BaseUrl /// /// /// Sample request: - ///
+ ///
///
POST /files ///
{ ///
"format": "text", @@ -4060,7 +4060,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -4072,7 +4072,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -4131,14 +4131,14 @@ public partial interface ITranslationEnginesClient ///
### nmt ///
The Neural Machine Translation engine is primarily used for pretranslations. It is fine-tuned from Meta's NLLB-200. Valid IETF language tags provided to Serval will be converted to [NLLB-200 codes](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200). See more about language tag resolution [here](https://github.com/sillsdev/serval/wiki/FLORES%E2%80%90200-Language-Code-Resolution-for-NMT-Engine). ///
* **IsModelPersisted**: (default to false) Whether the model can be downloaded by the client after it has been successfully built. - ///
+ ///
///
If you use a language among NLLB's supported languages, Serval will utilize everything the NLLB-200 model already knows about that language when translating. If the language you are working with is not among NLLB's supported languages, the language code will have no effect. - ///
+ ///
///
Typical endpoints: pretranslate ///
### echo ///
The echo engine has full coverage of all nmt and smt-transfer endpoints. Endpoints like create and build return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user in a format that mocks nmt or Smt. For example, translating a segment "test" with the echo engine would yield a translation response with translation "test". This engine is useful for debugging and testing purposes. ///
## Sample request: - ///
+ ///
///
{ ///
"name": "myTeam:myProject:myEngine", ///
"sourceLanguage": "el", @@ -4368,7 +4368,7 @@ public partial interface ITranslationEnginesClient ///
* The references defined in the SourceFile per line, if any. ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. ///
* **Translation**: the text of the pretranslation - ///
+ ///
///
Pretranslations can be filtered by text id if provided. ///
Only pretranslations for the most recent successful build of the engine are returned. ///
@@ -4390,7 +4390,7 @@ public partial interface ITranslationEnginesClient ///
* The references defined in the SourceFile per line, if any. ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. ///
* **Translation**: the text of the pretranslation - ///
+ ///
///
Only pretranslations for the most recent successful build of the engine are returned. ///
/// The translation engine id @@ -4410,12 +4410,12 @@ public partial interface ITranslationEnginesClient ///
* `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. ///
* `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed). ///
* `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed. - ///
+ ///
///
The source or target book can be used as the USFM template for the pretranslated text. The template can be controlled by the `template` parameter: ///
* `Auto`: The target book is used as the template if it exists; otherwise, the source book is used. **This is the default**. ///
* `Source`: The source book is used as the template. ///
* `Target`: The target book is used as the template. - ///
+ ///
///
Only pretranslations for the most recent successful build of the engine are returned. ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). /// @@ -4445,30 +4445,30 @@ public partial interface ITranslationEnginesClient ///
Specifying a corpus: ///
* A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected by specifying ParallelCorpusId. ///
* A parallel corpus can be further filtered by specifying particular CorpusIds in SourceFilters or TargetFilters. - ///
+ ///
///
Filtering by textID or chapter: ///
* Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. ///
* Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) ///
* All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. - ///
+ ///
///
Filter - train on all or none ///
* If trainOn or pretranslate is not provided, all corpora will be used for training or pretranslation respectively ///
* If a corpus is selected for training or pretranslation and neither scriptureRange nor textIds are defined, all of the selected corpus will be used. ///
* If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used. ///
* If a corpus is selected for training or pretranslation but no further filters are provided, all selected corpora will be used for training or pretranslation respectively. - ///
+ ///
///
Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation, ///
the following text will be pretranslated: ///
* Text segments that are in the source and not the target (untranslated) ///
* Text segments that are in the source and the target, but where that target segment is not trained on. ///
If the engine does not support pretranslation, these fields have no effect. ///
Pretranslating has the same filtering as training. - ///
+ ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. ///
See [smt-transfer job settings documentation](https://github.com/sillsdev/serval/wiki/SMT-Transfer-Build-Options) about configuring job parameters. ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. - ///
+ ///
///
When using a parallel corpus: ///
* If, within a single parallel corpus, multiple source corpora have data for the same textIds (for text files or Paratext Projects) or books (for Paratext Projects only using the scriptureRange), those sources will be mixed where they overlap by randomly choosing from each source per line/verse. ///
* If, within a single parallel corpus, multiple target corpora have data for the same textIds (for text files or Paratext Projects) or books (for Paratext Projects only using the scriptureRange), only the first of the targets that includes that textId/book will be used for that textId/book. @@ -4529,10 +4529,10 @@ public partial interface ITranslationEnginesClient /// /// If a Nmt build was successful and IsModelPersisted is `true` for the engine, ///
then the model from the most recent successful build can be downloaded. - ///
+ ///
///
The endpoint will return a URL that can be used to download the model for up to 1 hour ///
after the request is made. If the URL is not used within that time, a new request will need to be made. - ///
+ ///
///
The download itself is created by g-zipping together the folder containing the fine tuned model ///
with all necessary supporting files. This zipped folder is then named by the pattern: ///
* <engine_id>_<model_revision>.tar.gz @@ -4703,14 +4703,14 @@ public string BaseUrl ///
### nmt ///
The Neural Machine Translation engine is primarily used for pretranslations. It is fine-tuned from Meta's NLLB-200. Valid IETF language tags provided to Serval will be converted to [NLLB-200 codes](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200). See more about language tag resolution [here](https://github.com/sillsdev/serval/wiki/FLORES%E2%80%90200-Language-Code-Resolution-for-NMT-Engine). ///
* **IsModelPersisted**: (default to false) Whether the model can be downloaded by the client after it has been successfully built. - ///
+ ///
///
If you use a language among NLLB's supported languages, Serval will utilize everything the NLLB-200 model already knows about that language when translating. If the language you are working with is not among NLLB's supported languages, the language code will have no effect. - ///
+ ///
///
Typical endpoints: pretranslate ///
### echo ///
The echo engine has full coverage of all nmt and smt-transfer endpoints. Endpoints like create and build return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user in a format that mocks nmt or Smt. For example, translating a segment "test" with the echo engine would yield a translation response with translation "test". This engine is useful for debugging and testing purposes. ///
## Sample request: - ///
+ ///
///
{ ///
"name": "myTeam:myProject:myEngine", ///
"sourceLanguage": "el", @@ -6717,7 +6717,7 @@ public string BaseUrl ///
* The references defined in the SourceFile per line, if any. ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. ///
* **Translation**: the text of the pretranslation - ///
+ ///
///
Pretranslations can be filtered by text id if provided. ///
Only pretranslations for the most recent successful build of the engine are returned. ///
@@ -6851,7 +6851,7 @@ public string BaseUrl ///
* The references defined in the SourceFile per line, if any. ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. ///
* **Translation**: the text of the pretranslation - ///
+ ///
///
Only pretranslations for the most recent successful build of the engine are returned. /// /// The translation engine id @@ -6981,12 +6981,12 @@ public string BaseUrl ///
* `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. ///
* `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed). ///
* `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed. - ///
+ ///
///
The source or target book can be used as the USFM template for the pretranslated text. The template can be controlled by the `template` parameter: ///
* `Auto`: The target book is used as the template if it exists; otherwise, the source book is used. **This is the default**. ///
* `Source`: The source book is used as the template. ///
* `Target`: The target book is used as the template. - ///
+ ///
///
Only pretranslations for the most recent successful build of the engine are returned. ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). /// @@ -7241,30 +7241,30 @@ public string BaseUrl ///
Specifying a corpus: ///
* A (legacy) corpus is selected by specifying CorpusId and a parallel corpus is selected by specifying ParallelCorpusId. ///
* A parallel corpus can be further filtered by specifying particular CorpusIds in SourceFilters or TargetFilters. - ///
+ ///
///
Filtering by textID or chapter: ///
* Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. ///
* Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) ///
* All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. - ///
+ ///
///
Filter - train on all or none ///
* If trainOn or pretranslate is not provided, all corpora will be used for training or pretranslation respectively ///
* If a corpus is selected for training or pretranslation and neither scriptureRange nor textIds are defined, all of the selected corpus will be used. ///
* If a corpus is selected for training or pretranslation and an empty scriptureRange or textIds is defined, none of the selected corpus will be used. ///
* If a corpus is selected for training or pretranslation but no further filters are provided, all selected corpora will be used for training or pretranslation respectively. - ///
+ ///
///
Specify the corpora and textIds/scriptureRanges within those corpora to pretranslate. When a corpus is selected for pretranslation, ///
the following text will be pretranslated: ///
* Text segments that are in the source and not the target (untranslated) ///
* Text segments that are in the source and the target, but where that target segment is not trained on. ///
If the engine does not support pretranslation, these fields have no effect. ///
Pretranslating has the same filtering as training. - ///
+ ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. ///
See [smt-transfer job settings documentation](https://github.com/sillsdev/serval/wiki/SMT-Transfer-Build-Options) about configuring job parameters. ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. - ///
+ ///
///
When using a parallel corpus: ///
* If, within a single parallel corpus, multiple source corpora have data for the same textIds (for text files or Paratext Projects) or books (for Paratext Projects only using the scriptureRange), those sources will be mixed where they overlap by randomly choosing from each source per line/verse. ///
* If, within a single parallel corpus, multiple target corpora have data for the same textIds (for text files or Paratext Projects) or books (for Paratext Projects only using the scriptureRange), only the first of the targets that includes that textId/book will be used for that textId/book. @@ -7770,10 +7770,10 @@ public string BaseUrl /// /// If a Nmt build was successful and IsModelPersisted is `true` for the engine, ///
then the model from the most recent successful build can be downloaded. - ///
+ ///
///
The endpoint will return a URL that can be used to download the model for up to 1 hour ///
after the request is made. If the URL is not used within that time, a new request will need to be made. - ///
+ ///
///
The download itself is created by g-zipping together the folder containing the fine tuned model ///
with all necessary supporting files. This zipped folder is then named by the pattern: ///
* <engine_id>_<model_revision>.tar.gz @@ -7956,7 +7956,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -7968,7 +7968,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -8368,7 +8368,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -8380,7 +8380,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -8925,7 +8925,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -8937,7 +8937,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -8995,7 +8995,7 @@ public partial interface IWordAlignmentEnginesClient ///
The echo-word-alignment engine has full coverage of all endpoints. Endpoints like create and build return empty responses. ///
Endpoints like get-word-alignment echo the sent content back to the user in the proper format. This engine is useful for debugging and testing purposes. ///
## Sample request: - ///
+ ///
///
{ ///
"name": "myTeam:myProject:myEngine", ///
"sourceLanguage": "el", @@ -9111,7 +9111,7 @@ public partial interface IWordAlignmentEnginesClient ///
* **TargetTokens**: the tokenized target segment ///
* **Confidences**: the confidence of the alignment ona scale from 0 to 1 ///
* **Alignment**: the word alignment, 0 indexed for source and target positions - ///
+ ///
///
Word alignments can be filtered by text id if provided. ///
Only word alignments for the most recent successful build of the engine are returned. ///
@@ -9141,10 +9141,10 @@ public partial interface IWordAlignmentEnginesClient ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. - ///
+ ///
///
Specify the corpora or textIds to word align on. ///
When a corpus or textId is selected for word align on, only text segments that are in both the source and the target will be aligned. - ///
+ ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [statistical alignment job settings documentation](https://github.com/sillsdev/serval/wiki/Statistical-Alignment-Build-Options) about configuring job parameters. ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. @@ -9358,7 +9358,7 @@ public string BaseUrl ///
The echo-word-alignment engine has full coverage of all endpoints. Endpoints like create and build return empty responses. ///
Endpoints like get-word-alignment echo the sent content back to the user in the proper format. This engine is useful for debugging and testing purposes. ///
## Sample request: - ///
+ ///
///
{ ///
"name": "myTeam:myProject:myEngine", ///
"sourceLanguage": "el", @@ -10380,7 +10380,7 @@ public string BaseUrl ///
* **TargetTokens**: the tokenized target segment ///
* **Confidences**: the confidence of the alignment ona scale from 0 to 1 ///
* **Alignment**: the word alignment, 0 indexed for source and target positions - ///
+ ///
///
Word alignments can be filtered by text id if provided. ///
Only word alignments for the most recent successful build of the engine are returned. /// @@ -10617,10 +10617,10 @@ public string BaseUrl ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. - ///
+ ///
///
Specify the corpora or textIds to word align on. ///
When a corpus or textId is selected for word align on, only text segments that are in both the source and the target will be aligned. - ///
+ ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [statistical alignment job settings documentation](https://github.com/sillsdev/serval/wiki/Statistical-Alignment-Build-Options) about configuring job parameters. ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. @@ -11191,7 +11191,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -11203,7 +11203,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } @@ -11464,7 +11464,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); if (field != null) { - var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) as System.Runtime.Serialization.EnumMemberAttribute; if (attribute != null) { @@ -11476,7 +11476,7 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c return converted == null ? string.Empty : converted; } } - else if (value is bool) + else if (value is bool) { return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); } diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index 34c62a36..f22c033c 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -124,10 +124,15 @@ public override async Task CreateAsync(Engine engine, CancellationToken try { await Entities.InsertAsync(engine, cancellationToken); - TranslationEngineApi.TranslationEngineApiClient? client = - _grpcClientFactory.CreateClient(engine.Type); - if (client is null) + TranslationEngineApi.TranslationEngineApiClient? client; + try + { + client = _grpcClientFactory.CreateClient(engine.Type); + } + catch (InvalidOperationException) + { throw new InvalidOperationException($"'{engine.Type}' is an invalid engine type."); + } var request = new CreateRequest { EngineType = engine.Type, diff --git a/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs b/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs index b4d13849..c3dac71d 100644 --- a/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs +++ b/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs @@ -51,10 +51,17 @@ public override async Task CreateAsync(Engine engine, CancellationToken try { await Entities.InsertAsync(engine, cancellationToken); - WordAlignmentEngineApi.WordAlignmentEngineApiClient? client = - _grpcClientFactory.CreateClient(engine.Type); - if (client is null) + WordAlignmentEngineApi.WordAlignmentEngineApiClient client; + try + { + client = _grpcClientFactory.CreateClient( + engine.Type + ); + } + catch (InvalidOperationException) + { throw new InvalidOperationException($"'{engine.Type}' is an invalid engine type."); + } var request = new CreateRequest { EngineType = engine.Type, diff --git a/src/Serval/test/Serval.E2ETests/ServalApiTests.cs b/src/Serval/test/Serval.E2ETests/ServalApiTests.cs index e7a89280..06da1d1d 100644 --- a/src/Serval/test/Serval.E2ETests/ServalApiTests.cs +++ b/src/Serval/test/Serval.E2ETests/ServalApiTests.cs @@ -48,6 +48,30 @@ public async Task GetEchoPretranslate() Assert.That(pretranslations, Has.Count.GreaterThan(1)); } + [Test] + public async Task GetEchoWordAlignment() + { + string engineId = await _helperClient.CreateNewEngineAsync("EchoWordAlignment", "es", "es", "Echo3"); + string[] books = ["1JN.txt", "2JN.txt", "3JN.txt"]; + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false); + await _helperClient.BuildEngineAsync(engineId); + WordAlignmentResult tResult = await _helperClient.WordAlignmentEnginesClient.GetWordAlignmentAsync( + engineId, + new WordAlignmentRequest() { SourceSegment = "espíritu verdad", TargetSegment = "espíritu verdad" } + ); + Assert.That( + tResult.Alignment, + Is.EqualTo( + new List + { + new() { SourceIndex = 0, TargetIndex = 0 }, + new() { SourceIndex = 1, TargetIndex = 1 } + } + ) + ); + } + [Test] public async Task GetSmtTranslation() { @@ -146,8 +170,8 @@ public async Task NmtQueueMultiple() const int NUM_WORKERS = 8; string[] engineIds = new string[NUM_ENGINES]; string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"]; - IParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); - IParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + ParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( ["3JN.txt"], "es", "en", @@ -222,8 +246,8 @@ public async Task NmtLargeBatchAndDownload() TranslationEngine engine = await _helperClient.TranslationEnginesClient.GetAsync(engineId); Assert.That(engine.IsModelPersisted, Is.True); string[] books = ["bible_LARGEFILE.txt"]; - IParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); - IParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + ParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( ["3JN.txt"], "es", "en", diff --git a/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs b/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs index a3e7945d..85b85d2e 100644 --- a/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs +++ b/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs @@ -8,27 +8,93 @@ public enum EngineGroup WordAlignment } -public interface IBuild +public record Build { - string Id { get; set; } - int Revision { get; set; } - JobState State { get; set; } + public string Id { get; set; } + public int Revision { get; set; } + public JobState State { get; set; } + + public Build(TranslationBuild translationBuild) + { + Id = translationBuild.Id; + Revision = translationBuild.Revision; + State = translationBuild.State; + } + + public Build(WordAlignmentBuild wordAlignmentBuild) + { + Id = wordAlignmentBuild.Id; + Revision = wordAlignmentBuild.Revision; + State = wordAlignmentBuild.State; + } } -public interface IParallelCorpus +public record ParallelCorpus { - string Id { get; set; } - string Url { get; set; } - ResourceLink Engine { get; set; } - IList SourceCorpora { get; set; } - IList TargetCorpora { get; set; } + public string Id { get; set; } + public string Url { get; set; } + public ResourceLink Engine { get; set; } + public IList SourceCorpora { get; set; } + public IList TargetCorpora { get; set; } + + public ParallelCorpus(TranslationParallelCorpus translationParallelCorpus) + { + Id = translationParallelCorpus.Id; + Url = translationParallelCorpus.Url; + Engine = translationParallelCorpus.Engine; + SourceCorpora = translationParallelCorpus.SourceCorpora; + TargetCorpora = translationParallelCorpus.TargetCorpora; + } + + public ParallelCorpus(WordAlignmentParallelCorpus wordAlignmentParallelCorpus) + { + Id = wordAlignmentParallelCorpus.Id; + Url = wordAlignmentParallelCorpus.Url; + Engine = wordAlignmentParallelCorpus.Engine; + SourceCorpora = wordAlignmentParallelCorpus.SourceCorpora; + TargetCorpora = wordAlignmentParallelCorpus.TargetCorpora; + } } -public interface IParallelCorpusConfig +public record ParallelCorpusConfig { - public string Name { get; set; } + public string? Name { get; set; } public IList SourceCorpusIds { get; set; } public IList TargetCorpusIds { get; set; } + + public TranslationParallelCorpusConfig ToTranslationParallelCorpusConfig() + { + return new TranslationParallelCorpusConfig + { + Name = Name, + SourceCorpusIds = SourceCorpusIds, + TargetCorpusIds = TargetCorpusIds + }; + } + + public WordAlignmentParallelCorpusConfig ToWordAlignmentParallelCorpusConfig() + { + return new WordAlignmentParallelCorpusConfig + { + Name = Name, + SourceCorpusIds = SourceCorpusIds, + TargetCorpusIds = TargetCorpusIds + }; + } + + public ParallelCorpusConfig(TranslationParallelCorpusConfig translationParallelCorpusConfig) + { + Name = translationParallelCorpusConfig.Name; + SourceCorpusIds = translationParallelCorpusConfig.SourceCorpusIds; + TargetCorpusIds = translationParallelCorpusConfig.TargetCorpusIds; + } + + public ParallelCorpusConfig(WordAlignmentParallelCorpusConfig wordAlignmentParallelCorpusConfig) + { + Name = wordAlignmentParallelCorpusConfig.Name; + SourceCorpusIds = wordAlignmentParallelCorpusConfig.SourceCorpusIds; + TargetCorpusIds = wordAlignmentParallelCorpusConfig.TargetCorpusIds; + } } public class ServalClientHelper : IAsyncDisposable @@ -128,7 +194,7 @@ public WordAlignmentBuildConfig InitWordAlignmentBuildConfig() { WordAlignOn = [], TrainOn = null, - Options = "" + Options = null }; return WordAlignmentBuildConfig; } @@ -166,7 +232,7 @@ public async Task CreateNewEngineAsync( Name = name, SourceLanguage = sourceLanguage, TargetLanguage = targetLanguage, - Type = engineGroup.ToString(), + Type = engineType, IsModelPersisted = isModelPersisted } ); @@ -181,7 +247,7 @@ public async Task CreateNewEngineAsync( Name = name, SourceLanguage = sourceLanguage, TargetLanguage = targetLanguage, - Type = engineGroup.ToString(), + Type = engineType, } ); EngineIdToEngineGroup[engine.Id] = engineGroup; @@ -197,29 +263,34 @@ public async Task StartTranslationBuildAsync(string engineId) public async Task BuildEngineAsync(string engineId) { EngineGroup engineGroup = EngineIdToEngineGroup[engineId]; - IBuild newJob; + Build newJob; + int revision; if (engineGroup == EngineGroup.Translation) { - newJob = (IBuild)await StartTranslationBuildAsync(engineId); + newJob = new Build(await StartTranslationBuildAsync(engineId)); + revision = newJob.Revision; + await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, newJob.Revision); } else { - newJob = (IBuild)await WordAlignmentEnginesClient.StartBuildAsync(engineId, WordAlignmentBuildConfig); + newJob = new Build(await WordAlignmentEnginesClient.StartBuildAsync(engineId, WordAlignmentBuildConfig)); + revision = newJob.Revision; + await WordAlignmentEnginesClient.GetBuildAsync(engineId, newJob.Id, newJob.Revision); } - int revision = newJob.Revision; - await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, newJob.Revision); while (true) { try { - IBuild result; + Build result; if (engineGroup == EngineGroup.Translation) { - result = (IBuild)await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, revision + 1); + result = new Build(await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, revision + 1)); } else { - result = (IBuild)await WordAlignmentEnginesClient.GetBuildAsync(engineId, newJob.Id, revision + 1); + result = new Build( + await WordAlignmentEnginesClient.GetBuildAsync(engineId, newJob.Id, revision + 1) + ); } if (!(result.State == JobState.Active || result.State == JobState.Pending)) // build completed @@ -252,14 +323,14 @@ public async Task CancelBuildAsync(string engineId, string buildId, int timeoutS int tries = 1; while (true) { - IBuild build; + Build build; if (engineGroup == EngineGroup.Translation) { - build = (IBuild)await TranslationEnginesClient.GetBuildAsync(engineId, buildId); + build = new Build(await TranslationEnginesClient.GetBuildAsync(engineId, buildId)); } else { - build = (IBuild)await WordAlignmentEnginesClient.GetBuildAsync(engineId, buildId); + build = new Build(await WordAlignmentEnginesClient.GetBuildAsync(engineId, buildId)); } if (build.State != JobState.Pending && build.State != JobState.Active) break; @@ -337,7 +408,7 @@ bool inference return response.Id; } - public async Task MakeParallelTextCorpus( + public async Task MakeParallelTextCorpus( string[] filesToAdd, string sourceLanguage, string targetLanguage, @@ -395,24 +466,25 @@ bool inference TranslationParallelCorpusConfig parallelCorpusConfig = new() { SourceCorpusIds = { sourceCorpus.Id }, TargetCorpusIds = { targetCorpus.Id } }; - return (IParallelCorpusConfig)parallelCorpusConfig; + return new ParallelCorpusConfig(parallelCorpusConfig); } public async Task AddParallelTextCorpusToEngineAsync( string engineId, - IParallelCorpusConfig parallelCorpusConfig, + ParallelCorpusConfig parallelCorpusConfig, bool inference ) { EngineGroup engineGroup = EngineIdToEngineGroup[engineId]; - IParallelCorpus parallelCorpus; + ParallelCorpus parallelCorpus; if (engineGroup == EngineGroup.Translation) { - parallelCorpus = (IParallelCorpus) + parallelCorpus = new ParallelCorpus( await TranslationEnginesClient.AddParallelCorpusAsync( engineId, - (TranslationParallelCorpusConfig)parallelCorpusConfig - ); + parallelCorpusConfig.ToTranslationParallelCorpusConfig() + ) + ); if (inference) { TranslationBuildConfig.Pretranslate!.Add( @@ -422,11 +494,12 @@ await TranslationEnginesClient.AddParallelCorpusAsync( } else { - parallelCorpus = (IParallelCorpus) + parallelCorpus = new ParallelCorpus( await WordAlignmentEnginesClient.AddParallelCorpusAsync( engineId, - (WordAlignmentParallelCorpusConfig)parallelCorpusConfig - ); + parallelCorpusConfig.ToWordAlignmentParallelCorpusConfig() + ) + ); if (inference) { WordAlignmentBuildConfig.WordAlignOn!.Add( @@ -539,6 +612,7 @@ public static EngineGroup GetEngineGroup(string engineType) "Nmt" => EngineGroup.Translation, "Echo" => EngineGroup.Translation, "Statistical" => EngineGroup.WordAlignment, + "EchoWordAlignment" => EngineGroup.WordAlignment, _ => throw new ArgumentOutOfRangeException(engineType, "Unknown engine type") }; }