diff --git a/Serval.sln b/Serval.sln index 22056625..edd3f075 100644 --- a/Serval.sln +++ b/Serval.sln @@ -86,14 +86,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{C3A14577-A65 EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit", "src\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj", "{0E40F959-C641-40A2-9750-B17A4F9F9E55}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{341EEA9B-9E9E-4316-BE77-15769E03D646}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Serval", "Serval", "{858D3359-F7EA-4088-BDB1-5FDD22F44331}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{70703B03-4E62-4954-93E3-3845B9C678E7}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Serval.Corpora", "src\Serval\src\Serval.Corpora\Serval.Corpora.csproj", "{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -188,10 +180,6 @@ Global {0E40F959-C641-40A2-9750-B17A4F9F9E55}.Debug|Any CPU.Build.0 = Debug|Any CPU {0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.ActiveCfg = Release|Any CPU {0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.Build.0 = Release|Any CPU - {D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Debug|Any CPU.Build.0 = Debug|Any CPU - {D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Release|Any CPU.ActiveCfg = Release|Any CPU - {D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -227,9 +215,6 @@ Global {10657805-48F1-4205-B8F5-79447F6EF620} = {25CDB05B-4E24-4A6E-933E-1E0BEC97D74D} {C3A14577-A654-4604-818C-4E683DD45A51} = {EA69B41C-49EF-4017-A687-44B9DF37FF98} {0E40F959-C641-40A2-9750-B17A4F9F9E55} = {C3A14577-A654-4604-818C-4E683DD45A51} - {858D3359-F7EA-4088-BDB1-5FDD22F44331} = {341EEA9B-9E9E-4316-BE77-15769E03D646} - {70703B03-4E62-4954-93E3-3845B9C678E7} = {858D3359-F7EA-4088-BDB1-5FDD22F44331} - {D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD} = {70703B03-4E62-4954-93E3-3845B9C678E7} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {9F18C25E-E140-43C3-B177-D562E1628370} diff --git a/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj b/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj index bc87b846..d40fb933 100644 --- a/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj +++ b/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj @@ -45,7 +45,6 @@ - diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index ec6492c4..f612f5f8 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -2484,7 +2484,7 @@ public partial interface ICorporaClient /// Update an existing corpus /// /// The unique identifier for the corpus - /// The new corpus files + /// Tuples of the ids of the new corpus files and the associated text ids /// The corpus was updated successfully /// A server side error occurred. System.Threading.Tasks.Task UpdateAsync(string id, System.Collections.Generic.IEnumerable files, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); @@ -2853,7 +2853,7 @@ public string BaseUrl /// Update an existing corpus /// /// The unique identifier for the corpus - /// The new corpus files + /// Tuples of the ids of the new corpus files and the associated text ids /// The corpus was updated successfully /// A server side error occurred. public virtual async System.Threading.Tasks.Task UpdateAsync(string id, System.Collections.Generic.IEnumerable files, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) @@ -4249,7 +4249,7 @@ public partial interface ITranslationEnginesClient /// Get all corpora for a translation engine /// /// The translation engine id - /// The files + /// The corpora /// A server side error occurred. System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); @@ -4292,6 +4292,80 @@ public partial interface ITranslationEnginesClient /// A server side error occurred. System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Add a corpus to a translation engine + /// + /// + /// ## Parameters + ///
* **name**: A name to help identify and distinguish the corpus from other corpora + ///
* The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + ///
* **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + ///
* **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* **SourceFiles**: The source files associated with the corpus + ///
* **FileId**: The unique id referencing the uploaded file + ///
* **TextId**: The client-defined name to associate source and target files. + ///
* If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + ///
* If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + ///
* If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + ///
* For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + ///
* For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + ///
* **TargetFiles**: The target files associated with the corpus + ///
* Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + ///
+ /// The translation engine id + /// The corpus configuration (see remarks) + /// The added corpus + /// A server side error occurred. + System.Threading.Tasks.Task AddParallelCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all parallel corpora for a translation engine + /// + /// The translation engine id + /// The parallel corpora + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllParallelCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update a corpus with a new set of files + /// + /// + /// Will completely replace corpus' file associations. Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + /// + /// The translation engine id + /// The corpus configuration + /// The corpus id + /// The corpus was updated successfully + /// A server side error occurred. + System.Threading.Tasks.Task UpdateCorpus2Async(string id, TranslationParallelCorpusUpdateConfig corpusConfig, string corpusId, string? parallelCorpusId = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the configuration of a parallel corpus for a translation engine + /// + /// The translation engine id + /// The parallel corpus id + /// The parallel corpus configuration + /// A server side error occurred. + System.Threading.Tasks.Task GetParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Remove a parallel corpus from a translation engine + /// + /// + /// Removing a parallel corpus will remove all pretranslations associated with that corpus. + /// + /// The translation engine id + /// The parallel corpus id + /// The parallel corpus was deleted successfully. + /// A server side error occurred. + System.Threading.Tasks.Task DeleteParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// /// Get all pretranslations in a corpus of a translation engine @@ -5296,19 +5370,624 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); - } - else - if (status_ == 405) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); - } - else - if (status_ == 409) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Incrementally train a translation engine with a segment pair + /// + /// + /// A segment pair consists of a source and target segment as well as a boolean flag `sentenceStart` + ///
that should be set to true if this segment pair forms the beginning of a sentence. (This information + ///
will be used to reconstruct proper capitalization when training/inferencing). + ///
+ /// The translation engine id + /// The segment pair + /// The engine was trained successfully. + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task TrainSegmentAsync(string id, SegmentPair segmentPair, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (segmentPair == null) + throw new System.ArgumentNullException("segmentPair"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segmentPair, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}/train-segment" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/train-segment"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + return; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Add a corpus to a translation engine + /// + /// + /// ## Parameters + ///
* **name**: A name to help identify and distinguish the corpus from other corpora + ///
* The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + ///
* **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + ///
* **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + ///
* **SourceFiles**: The source files associated with the corpus + ///
* **FileId**: The unique id referencing the uploaded file + ///
* **TextId**: The client-defined name to associate source and target files. + ///
* If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + ///
* If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + ///
* If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + ///
* For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + ///
* For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + ///
* **TargetFiles**: The target files associated with the corpus + ///
* Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + ///
+ /// The translation engine id + /// The corpus configuration (see remarks) + /// The added corpus + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}/corpora" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 201) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all corpora for a translation engine + /// + /// The translation engine id + /// The corpora + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}/corpora" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update a corpus with a new set of files + /// + /// + /// See posting a new corpus for details of use. Will completely replace corpus' file associations. + ///
Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + ///
+ /// The translation engine id + /// The corpus id + /// The corpus configuration + /// The corpus was updated successfully + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); + + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("PATCH"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the configuration of a corpus for a translation engine + /// + /// The translation engine id + /// The corpus id + /// The corpus configuration + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + urlBuilder_.Append("translation/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the translation engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -5338,24 +6017,23 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Incrementally train a translation engine with a segment pair + /// Remove a corpus from a translation engine /// /// - /// A segment pair consists of a source and target segment as well as a boolean flag `sentenceStart` - ///
that should be set to true if this segment pair forms the beginning of a sentence. (This information - ///
will be used to reconstruct proper capitalization when training/inferencing). + /// Removing a corpus will remove all pretranslations associated with that corpus. ///
/// The translation engine id - /// The segment pair - /// The engine was trained successfully. + /// The corpus id + /// If true, all files associated with the corpus will be deleted as well (even if they are associated with other corpora). If false, no files will be deleted. + /// The corpus was deleted successfully. /// A server side error occurred. - public virtual async System.Threading.Tasks.Task TrainSegmentAsync(string id, SegmentPair segmentPair, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (segmentPair == null) - throw new System.ArgumentNullException("segmentPair"); + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -5363,18 +6041,21 @@ public string BaseUrl { using (var request_ = new System.Net.Http.HttpRequestMessage()) { - var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(segmentPair, JsonSerializerSettings); - var content_ = new System.Net.Http.StringContent(json_); - content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); - request_.Content = content_; - request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Method = new System.Net.Http.HttpMethod("DELETE"); var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/train-segment" + // Operation Path: "translation/engines/{id}/corpora/{corpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/train-segment"); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append('?'); + if (deleteFiles != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("delete-files")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(deleteFiles, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; PrepareRequest(client_, request_, urlBuilder_); @@ -5404,12 +6085,6 @@ public string BaseUrl return; } else - if (status_ == 400) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("Bad request", status_, responseText_, headers_, null); - } - else if (status_ == 401) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); @@ -5425,19 +6100,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); - } - else - if (status_ == 405) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); - } - else - if (status_ == 409) - { - string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -5491,7 +6154,7 @@ public string BaseUrl /// The corpus configuration (see remarks) /// The added corpus /// A server side error occurred. - public virtual async System.Threading.Tasks.Task AddCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task AddParallelCorpusAsync(string id, TranslationCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -5514,10 +6177,10 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora" + // Operation Path: "translation/engines/{id}/parallel-corpora" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora"); + urlBuilder_.Append("/parallel-corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -5603,12 +6266,12 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get all corpora for a translation engine + /// Get all parallel corpora for a translation engine /// /// The translation engine id - /// The files + /// The parallel corpora /// A server side error occurred. - public virtual async System.Threading.Tasks.Task> GetAllCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task> GetAllParallelCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -5624,10 +6287,10 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora" + // Operation Path: "translation/engines/{id}/parallel-corpora" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora"); + urlBuilder_.Append("/parallel-corpora"); PrepareRequest(client_, request_, urlBuilder_); @@ -5654,7 +6317,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -5710,15 +6373,14 @@ public string BaseUrl /// Update a corpus with a new set of files ///
/// - /// See posting a new corpus for details of use. Will completely replace corpus' file associations. - ///
Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + /// Will completely replace corpus' file associations. Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. ///
/// The translation engine id - /// The corpus id /// The corpus configuration + /// The corpus id /// The corpus was updated successfully /// A server side error occurred. - public virtual async System.Threading.Tasks.Task UpdateCorpusAsync(string id, string corpusId, TranslationCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task UpdateCorpus2Async(string id, TranslationParallelCorpusUpdateConfig corpusConfig, string corpusId, string? parallelCorpusId = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -5744,11 +6406,17 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + // Operation Path: "translation/engines/{id}/parallel-corpora/{corpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora/"); + urlBuilder_.Append("/parallel-corpora/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append('?'); + if (parallelCorpusId != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("parallelCorpusId")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; PrepareRequest(client_, request_, urlBuilder_); @@ -5834,19 +6502,19 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Get the configuration of a corpus for a translation engine + /// Get the configuration of a parallel corpus for a translation engine /// /// The translation engine id - /// The corpus id - /// The corpus configuration + /// The parallel corpus id + /// The parallel corpus configuration /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetCorpusAsync(string id, string corpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (corpusId == null) - throw new System.ArgumentNullException("corpusId"); + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -5859,11 +6527,11 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + // Operation Path: "translation/engines/{id}/parallel-corpora/{parallelCorpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -5890,7 +6558,7 @@ public string BaseUrl var status_ = (int)response_.StatusCode; if (status_ == 200) { - var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); if (objectResponse_.Object == null) { throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); @@ -5913,7 +6581,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or parallel corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -5943,23 +6611,22 @@ public string BaseUrl /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// - /// Remove a corpus from a translation engine + /// Remove a parallel corpus from a translation engine /// /// - /// Removing a corpus will remove all pretranslations associated with that corpus. + /// Removing a parallel corpus will remove all pretranslations associated with that corpus. /// /// The translation engine id - /// The corpus id - /// If true, all files associated with the corpus will be deleted as well (even if they are associated with other corpora). If false, no files will be deleted. - /// The corpus was deleted successfully. + /// The parallel corpus id + /// The parallel corpus was deleted successfully. /// A server side error occurred. - public virtual async System.Threading.Tasks.Task DeleteCorpusAsync(string id, string corpusId, bool? deleteFiles = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task DeleteParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); - if (corpusId == null) - throw new System.ArgumentNullException("corpusId"); + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); var client_ = _httpClient; var disposeClient_ = false; @@ -5971,17 +6638,11 @@ public string BaseUrl var urlBuilder_ = new System.Text.StringBuilder(); if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); - // Operation Path: "translation/engines/{id}/corpora/{corpusId}" + // Operation Path: "translation/engines/{id}/parallel-corpora/{parallelCorpusId}" urlBuilder_.Append("translation/engines/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append("/corpora/"); - urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); - urlBuilder_.Append('?'); - if (deleteFiles != null) - { - urlBuilder_.Append(System.Uri.EscapeDataString("delete-files")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(deleteFiles, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); - } - urlBuilder_.Length--; + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); PrepareRequest(client_, request_, urlBuilder_); @@ -6026,7 +6687,7 @@ public string BaseUrl if (status_ == 404) { string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); - throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + throw new ServalApiException("The engine or parallel corpus does not exist.", status_, responseText_, headers_, null); } else if (status_ == 503) @@ -8628,35 +9289,7 @@ public partial class CorpusFile { [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] - public ResourceLink File { get; set; } = new ResourceLink(); - - [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? TextId { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusConfig - { - [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Name { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Language { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusFileConfig - { - [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string FileId { get; set; } = default!; + public DataFile File { get; set; } = new DataFile(); [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? TextId { get; set; } = default!; @@ -8699,6 +9332,34 @@ public enum FileFormat } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusConfig + { + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusFileConfig + { + [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string FileId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TranslationEngine { @@ -9023,6 +9684,63 @@ public partial class TranslationCorpusUpdateConfig } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpusUpdateConfig + { + [Newtonsoft.Json.JsonProperty("sourceCorpusRefs", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceCorpusRefs { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetCorpusRefs", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetCorpusRefs { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("engine", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public ResourceLink Engine { get; set; } = new ResourceLink(); + + [Newtonsoft.Json.JsonProperty("sourceCorpora", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceCorpora { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetCorpora", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetCorpora { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpusSubcorpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class Pretranslation { @@ -9130,6 +9848,29 @@ public partial class TranslationBuild [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TrainingCorpus + { + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? Corpus { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ScriptureRange { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? ParallelCorpus { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class ParallelCorpusFilter { [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] @@ -9146,9 +9887,8 @@ public partial class TrainingCorpus [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class PretranslateCorpus { - [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public ResourceLink Corpus { get; set; } = new ResourceLink(); + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? Corpus { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public System.Collections.Generic.IList? TextIds { get; set; } = default!; @@ -9156,6 +9896,15 @@ public partial class PretranslateCorpus [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? ScriptureRange { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? ParallelCorpus { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] @@ -9178,9 +9927,32 @@ public partial class TranslationBuildConfig [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TrainingCorpusConfig { - [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? CorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ScriptureRange { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ParallelCorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class ParallelCorpusFilterConfig + { + [Newtonsoft.Json.JsonProperty("corpusRef", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string CorpusId { get; set; } = default!; + public string CorpusRef { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public System.Collections.Generic.IList? TextIds { get; set; } = default!; @@ -9193,9 +9965,8 @@ public partial class TrainingCorpusConfig [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class PretranslateCorpusConfig { - [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string CorpusId { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? CorpusId { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public System.Collections.Generic.IList? TextIds { get; set; } = default!; @@ -9203,6 +9974,15 @@ public partial class PretranslateCorpusConfig [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? ScriptureRange { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ParallelCorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] diff --git a/src/Serval/src/Serval.Corpora/Configuration/IMongoDataAccessConfiguratorExtensions.cs b/src/Serval/src/Serval.Corpora/Configuration/IMongoDataAccessConfiguratorExtensions.cs deleted file mode 100644 index 45fc536d..00000000 --- a/src/Serval/src/Serval.Corpora/Configuration/IMongoDataAccessConfiguratorExtensions.cs +++ /dev/null @@ -1,18 +0,0 @@ -using MongoDB.Driver; - -namespace Microsoft.Extensions.DependencyInjection; - -public static class IMongoDataAccessConfiguratorExtensions -{ - public static IMongoDataAccessConfigurator AddCorporaRepository(this IMongoDataAccessConfigurator configurator) - { - configurator.AddRepository( - "corpora.corpus", - init: c => - c.Indexes.CreateOrUpdateAsync( - new CreateIndexModel(Builders.IndexKeys.Ascending(p => p.Owner)) - ) - ); - return configurator; - } -} diff --git a/src/Serval/src/Serval.Corpora/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.Corpora/Configuration/IServalBuilderExtensions.cs deleted file mode 100644 index 2f7570b9..00000000 --- a/src/Serval/src/Serval.Corpora/Configuration/IServalBuilderExtensions.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Microsoft.Extensions.DependencyInjection; - -public static class IServalBuilderExtensions -{ - public static IServalBuilder AddCorpora(this IServalBuilder builder) - { - builder.Services.AddScoped(); - return builder; - } -} diff --git a/src/Serval/src/Serval.Corpora/Models/CorpusFile.cs b/src/Serval/src/Serval.Corpora/Models/CorpusFile.cs deleted file mode 100644 index ed78a9d6..00000000 --- a/src/Serval/src/Serval.Corpora/Models/CorpusFile.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Serval.Corpora.Models; - -public record CorpusFile -{ - public required string Id { get; set; } - public required string Filename { get; set; } - public required FileFormat Format { get; set; } - public required string TextId { get; set; } -} diff --git a/src/Serval/src/Serval.Corpora/Serval.Corpora.csproj b/src/Serval/src/Serval.Corpora/Serval.Corpora.csproj deleted file mode 100644 index 198f83e4..00000000 --- a/src/Serval/src/Serval.Corpora/Serval.Corpora.csproj +++ /dev/null @@ -1,25 +0,0 @@ - - - - net8.0 - enable - enable - true - true - true - $(NoWarn);CS1591;CS1573 - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/Serval/src/Serval.Corpora/Usings.cs b/src/Serval/src/Serval.Corpora/Usings.cs deleted file mode 100644 index 5170a302..00000000 --- a/src/Serval/src/Serval.Corpora/Usings.cs +++ /dev/null @@ -1,15 +0,0 @@ -global using System.Diagnostics.CodeAnalysis; -global using Asp.Versioning; -global using Microsoft.AspNetCore.Authorization; -global using Microsoft.AspNetCore.Http; -global using Microsoft.AspNetCore.Mvc; -global using NSwag.Annotations; -global using Serval.Corpora.Contracts; -global using Serval.Corpora.Models; -global using Serval.Corpora.Services; -global using Serval.Shared.Contracts; -global using Serval.Shared.Controllers; -global using Serval.Shared.Models; -global using Serval.Shared.Services; -global using Serval.Shared.Utils; -global using SIL.DataAccess; diff --git a/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs b/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs index 86d10a52..f14427f2 100644 --- a/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs +++ b/src/Serval/src/Serval.DataFiles/Configuration/IMongoDataAccessConfiguratorExtensions.cs @@ -23,4 +23,16 @@ public static IMongoDataAccessConfigurator AddDataFilesRepositories(this IMongoD ); return configurator; } + + public static IMongoDataAccessConfigurator AddCorporaRepository(this IMongoDataAccessConfigurator configurator) + { + configurator.AddRepository( + "corpora.corpus", + init: c => + c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(p => p.Owner)) + ) + ); + return configurator; + } } diff --git a/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs index 64a25658..3792ae1f 100644 --- a/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs +++ b/src/Serval/src/Serval.DataFiles/Configuration/IServalBuilderExtensions.cs @@ -13,4 +13,10 @@ public static IServalBuilder AddDataFiles(this IServalBuilder builder) builder.Services.AddHostedService(); return builder; } + + public static IServalBuilder AddCorpora(this IServalBuilder builder) + { + builder.Services.AddScoped(); + return builder; + } } diff --git a/src/Serval/src/Serval.Corpora/Contracts/CorpusConfigDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusConfigDto.cs similarity index 100% rename from src/Serval/src/Serval.Corpora/Contracts/CorpusConfigDto.cs rename to src/Serval/src/Serval.DataFiles/Contracts/CorpusConfigDto.cs diff --git a/src/Serval/src/Serval.Corpora/Contracts/CorpusDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusDto.cs similarity index 100% rename from src/Serval/src/Serval.Corpora/Contracts/CorpusDto.cs rename to src/Serval/src/Serval.DataFiles/Contracts/CorpusDto.cs diff --git a/src/Serval/src/Serval.Corpora/Contracts/CorpusFileConfigDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileConfigDto.cs similarity index 100% rename from src/Serval/src/Serval.Corpora/Contracts/CorpusFileConfigDto.cs rename to src/Serval/src/Serval.DataFiles/Contracts/CorpusFileConfigDto.cs diff --git a/src/Serval/src/Serval.Corpora/Contracts/CorpusFileDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs similarity index 66% rename from src/Serval/src/Serval.Corpora/Contracts/CorpusFileDto.cs rename to src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs index fb8cbed6..5e9a5ded 100644 --- a/src/Serval/src/Serval.Corpora/Contracts/CorpusFileDto.cs +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs @@ -2,6 +2,6 @@ namespace Serval.Corpora.Contracts; public record CorpusFileDto { - public required ResourceLinkDto File { get; init; } + public required DataFileDto File { get; init; } public string? TextId { get; init; } } diff --git a/src/Serval/src/Serval.Corpora/Controllers/CorporaController.cs b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs similarity index 76% rename from src/Serval/src/Serval.Corpora/Controllers/CorporaController.cs rename to src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs index 14db109d..56a8d691 100644 --- a/src/Serval/src/Serval.Corpora/Controllers/CorporaController.cs +++ b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs @@ -1,16 +1,20 @@ -using MassTransit; - namespace Serval.Corpora.Controllers; [ApiVersion("1.0")] [Route("api/v{version:apiVersion}/corpora")] [OpenApiTag("Corpora")] -public class CorporaController(IAuthorizationService authService, ICorpusService corpusService, IUrlService urlService) - : ServalControllerBase(authService) +public class CorporaController( + IAuthorizationService authService, + ICorpusService corpusService, + IUrlService urlService, + IDataFileService dataFileService +) : ServalControllerBase(authService) { private readonly ICorpusService _corpusService = corpusService; private readonly IUrlService _urlService = urlService; + private readonly IDataFileService _dataFileService = dataFileService; + /// /// Get all corpora /// @@ -18,7 +22,7 @@ public class CorporaController(IAuthorizationService authService, ICorpusService /// The client is not authenticated /// The authenticated client cannot perform the operation /// A necessary service is currently unavailable. Check `/health` for more details. - [Authorize(Scopes.ReadCorpora)] + [Authorize(Scopes.ReadFiles)] [HttpGet] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] @@ -39,7 +43,7 @@ public async Task> GetAllAsync(CancellationToken cancella /// The authenticated client cannot perform the operation or does not own the corpus /// The corpus does not exist /// A necessary service is currently unavailable. Check `/health` for more details. - [Authorize(Scopes.ReadCorpora)] + [Authorize(Scopes.ReadFiles)] [HttpGet("{id}", Name = Endpoints.GetCorpus)] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] @@ -63,7 +67,7 @@ public async Task> GetAsync([NotNull] string id, Cancell /// The client is not authenticated /// The authenticated client cannot perform the operation /// A necessary service is currently unavailable. Check `/health` for more details. - [Authorize(Scopes.CreateCorpora)] + [Authorize(Scopes.CreateFiles)] [HttpPost] [ProducesResponseType(StatusCodes.Status201Created)] [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] @@ -77,7 +81,7 @@ public async Task> CreateAsync( CancellationToken cancellationToken ) { - Corpus corpus = await MapAsync(getDataFileClient, corpusConfig, idGenerator.GenerateId(), cancellationToken); + Corpus corpus = await MapAsync(corpusConfig, idGenerator.GenerateId(), cancellationToken); await _corpusService.CreateAsync(corpus, cancellationToken); CorpusDto dto = Map(corpus); return Created(dto.Url, dto); @@ -87,7 +91,7 @@ CancellationToken cancellationToken /// Update an existing corpus /// /// The unique identifier for the corpus - /// The new corpus files + /// Tuples of the ids of the new corpus files and the associated text ids /// /// The corpus was updated successfully /// Bad request @@ -95,7 +99,7 @@ CancellationToken cancellationToken /// The authenticated client cannot perform the operation or does not own the corpus /// The corpus does not exist and therefore cannot be updated /// A necessary service is currently unavailable. Check `/health` for more details. - [Authorize(Scopes.UpdateCorpora)] + [Authorize(Scopes.UpdateFiles)] [HttpPatch("{id}")] [ProducesResponseType(StatusCodes.Status200OK)] [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] @@ -106,7 +110,6 @@ CancellationToken cancellationToken public async Task> UpdateAsync( [NotNull] string id, [NotNull] IReadOnlyList files, - [FromServices] IRequestClient getDataFileClient, CancellationToken cancellationToken ) { @@ -114,7 +117,7 @@ CancellationToken cancellationToken Corpus corpus = await _corpusService.UpdateAsync( id, - await MapAsync(getDataFileClient, files, cancellationToken), + await MapAsync(files, cancellationToken), cancellationToken ); @@ -132,7 +135,7 @@ await MapAsync(getDataFileClient, files, cancellationToken), /// The authenticated client cannot perform the operation or does not own the corpus /// The corpus does not exist and therefore cannot be deleted /// A necessary service is currently unavailable. Check `/health` for more details. - [Authorize(Scopes.DeleteCorpora)] + [Authorize(Scopes.DeleteFiles)] [HttpDelete("{id}")] [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] @@ -152,53 +155,31 @@ private async Task AuthorizeAsync(string id, CancellationToken cancellationToken await AuthorizeAsync(corpus); } - private async Task MapAsync( - IRequestClient getDataFileClient, - CorpusConfigDto corpusConfig, - string id, - CancellationToken cancellationToken - ) + private async Task MapAsync(CorpusConfigDto corpusConfig, string id, CancellationToken cancellationToken) { return new Corpus { Id = id, Owner = Owner, Language = corpusConfig.Language, - Files = await MapAsync(getDataFileClient, corpusConfig.Files, cancellationToken) + Files = await MapAsync(corpusConfig.Files, cancellationToken) }; } private async Task> MapAsync( - IRequestClient getDataFileClient, - IEnumerable corpusFileConfigs, + IReadOnlyList files, CancellationToken cancellationToken ) { - var files = new List(); - foreach (CorpusFileConfigDto corpusFileConfig in corpusFileConfigs) + var dataFiles = new List(); + foreach (CorpusFileConfigDto file in files) { - Response response = await getDataFileClient.GetResponse< - DataFileResult, - DataFileNotFound - >(new GetDataFile { DataFileId = corpusFileConfig.FileId, Owner = Owner }, cancellationToken); - if (response.Is(out Response? result)) - { - files.Add( - new CorpusFile - { - Id = corpusFileConfig.FileId, - Filename = result.Message.Filename, - TextId = corpusFileConfig.TextId ?? result.Message.Name, - Format = result.Message.Format - } - ); - } - else if (response.Is(out Response? _)) - { - throw new InvalidOperationException($"The data file {corpusFileConfig.FileId} cannot be found."); - } + DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken); + if (dataFile == null) + throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist."); + dataFiles.Add(new CorpusFile { File = dataFile, TextId = file.TextId }); } - return files; + return dataFiles; } private CorpusDto Map(Corpus source) @@ -216,14 +197,18 @@ private CorpusDto Map(Corpus source) private CorpusFileDto Map(CorpusFile source) { - return new CorpusFileDto + return new CorpusFileDto { File = Map(source.File), TextId = source.TextId }; + } + + private DataFileDto Map(DataFile source) + { + return new DataFileDto { - TextId = source.TextId, - File = new ResourceLinkDto - { - Id = source.Id, - Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }) - } + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }), + Name = source.Name, + Format = source.Format, + Revision = source.Revision }; } } diff --git a/src/Serval/src/Serval.Corpora/Models/Corpus.cs b/src/Serval/src/Serval.DataFiles/Models/Corpus.cs similarity index 100% rename from src/Serval/src/Serval.Corpora/Models/Corpus.cs rename to src/Serval/src/Serval.DataFiles/Models/Corpus.cs diff --git a/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs new file mode 100644 index 00000000..98d3b7ff --- /dev/null +++ b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs @@ -0,0 +1,7 @@ +namespace Serval.Corpora.Contracts; + +public record CorpusFile +{ + public required DataFile File { get; init; } + public string? TextId { get; init; } +} diff --git a/src/Serval/src/Serval.Corpora/Services/CorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs similarity index 100% rename from src/Serval/src/Serval.Corpora/Services/CorpusService.cs rename to src/Serval/src/Serval.DataFiles/Services/CorpusService.cs diff --git a/src/Serval/src/Serval.Corpora/Services/ICorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs similarity index 100% rename from src/Serval/src/Serval.Corpora/Services/ICorpusService.cs rename to src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs diff --git a/src/Serval/src/Serval.DataFiles/Usings.cs b/src/Serval/src/Serval.DataFiles/Usings.cs index 0d81ccb1..0054bbee 100644 --- a/src/Serval/src/Serval.DataFiles/Usings.cs +++ b/src/Serval/src/Serval.DataFiles/Usings.cs @@ -14,6 +14,9 @@ global using Microsoft.Extensions.Logging; global using Microsoft.Extensions.Options; global using NSwag.Annotations; +global using Serval.Corpora.Contracts; +global using Serval.Corpora.Models; +global using Serval.Corpora.Services; global using Serval.DataFiles.Consumers; global using Serval.DataFiles.Contracts; global using Serval.DataFiles.Models; diff --git a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto index 6ae643c5..49d4549d 100644 --- a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto +++ b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/engine.proto @@ -185,4 +185,4 @@ enum TranslationSource { TRANSLATION_SOURCE_PRIMARY = 0; TRANSLATION_SOURCE_SECONDARY = 1; TRANSLATION_SOURCE_HUMAN = 2; -} +} \ No newline at end of file diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs new file mode 100644 index 00000000..953d705b --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record CorpusFileResult +{ + public required DataFileResult File { get; init; } + public required string TextId { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs new file mode 100644 index 00000000..81f9246b --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record CorpusNotFound +{ + public required string CorpusId { get; init; } + public required string Owner { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs new file mode 100644 index 00000000..25a5e6b8 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusResult.cs @@ -0,0 +1,10 @@ +namespace Serval.Shared.Contracts; + +public record CorpusResult +{ + public required string CorpusId { get; init; } + public required string Language { get; init; } + public string? Name { get; init; } + public required string Url { get; init; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs b/src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs new file mode 100644 index 00000000..a29b4f12 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/GetCorpus.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record GetCorpus +{ + public required string CorpusId { get; init; } + public required string Owner { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs b/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs index 44961147..e8e147a0 100644 --- a/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs +++ b/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs @@ -6,6 +6,7 @@ public static class Endpoints public const string GetTranslationEngine = "GetTranslationEngine"; public const string GetTranslationCorpus = "GetTranslationCorpus"; + public const string GetParallelTranslationCorpus = "GetParallelTranslationCorpus"; public const string GetTranslationBuild = "GetTranslationBuild"; public const string GetAssessmentEngine = "GetAssessmentEngine"; diff --git a/src/Serval/src/Serval.Shared/Controllers/Scopes.cs b/src/Serval/src/Serval.Shared/Controllers/Scopes.cs index ffc886b9..6d324659 100644 --- a/src/Serval/src/Serval.Shared/Controllers/Scopes.cs +++ b/src/Serval/src/Serval.Shared/Controllers/Scopes.cs @@ -20,12 +20,6 @@ public static class Scopes public const string ReadFiles = "read:files"; public const string UpdateFiles = "update:files"; public const string DeleteFiles = "delete:files"; - - public const string CreateCorpora = "read:files"; //"create:corpora"; //TODO fix auth scopes - public const string ReadCorpora = "read:files"; //"read:corpora"; - public const string UpdateCorpora = "read:files"; //"update:corpora"; - public const string DeleteCorpora = "read:files"; //"delete:corpora"; - public const string ReadStatus = "read:status"; public static IEnumerable All => @@ -45,10 +39,6 @@ public static class Scopes ReadFiles, UpdateFiles, DeleteFiles, - ReadStatus, - CreateCorpora, - ReadCorpora, - UpdateCorpora, - DeleteCorpora + ReadStatus ]; } diff --git a/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusDto.cs new file mode 100644 index 00000000..8fa40110 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusDto.cs @@ -0,0 +1,8 @@ +namespace Serval.Translation.Contracts; + +public record ParallelCorpusDto +{ + public required ResourceLinkDto Corpus { get; set; } + public IReadOnlyList SourceCorporaRefs { get; set; } = new List(); + public IReadOnlyList TargetCorporaRefs { get; set; } = new List(); +} diff --git a/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilter.cs b/src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterDto.cs similarity index 100% rename from src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilter.cs rename to src/Serval/src/Serval.Translation/Contracts/ParallelCorpusFilterDto.cs diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs index d33d2c2b..9217cc54 100644 --- a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusConfigDto.cs @@ -9,6 +9,6 @@ public string? ScriptureRange { get; init; } public string? ParallelCorpusId { get; init; } - public ParallelCorpusFilterConfigDto? SourceFilter { get; init; } - public ParallelCorpusFilterConfigDto? TargetFilter { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs index fc859f86..ab686454 100644 --- a/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslateCorpusDto.cs @@ -9,6 +9,6 @@ public record PretranslateCorpusDto public string? ScriptureRange { get; init; } public ResourceLinkDto? ParallelCorpus { get; init; } - public ParallelCorpusFilterDto? SourceFilter { get; init; } - public ParallelCorpusFilterDto? TargetFilter { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs index 852f2421..c8161a5f 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusConfigDto.cs @@ -7,6 +7,6 @@ public record TrainingCorpusConfigDto public string? ScriptureRange { get; init; } public string? ParallelCorpusId { get; init; } - public ParallelCorpusFilterConfigDto? SourceFilter { get; init; } - public ParallelCorpusFilterConfigDto? TargetFilter { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs index 39065afd..f734f43b 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TrainingCorpusDto.cs @@ -7,6 +7,6 @@ public record TrainingCorpusDto public string? ScriptureRange { get; init; } public ResourceLinkDto? ParallelCorpus { get; init; } - public ParallelCorpusFilterDto? SourceFilter { get; init; } - public ParallelCorpusFilterDto? TargetFilter { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusCorpusDto.cs new file mode 100644 index 00000000..3a5e29e6 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusCorpusDto.cs @@ -0,0 +1,9 @@ +namespace Serval.Translation.Contracts; + +public record TranslationParallelCorpusSubcorpusDto +{ + public required string Id { get; set; } + public required string Name { get; set; } + public required string Language { get; set; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs index bd3cc46a..4fae3ab3 100644 --- a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusDto.cs @@ -6,11 +6,8 @@ public record TranslationParallelCorpusDto public required string Url { get; init; } public required ResourceLinkDto Engine { get; init; } - /// - /// The corpus name. - /// - public string? Name { get; init; } - - public required IReadOnlyList SourceCorporaRefs { get; init; } = new List(); - public required IReadOnlyList TargetCorporaRefs { get; init; } = new List(); + public required IReadOnlyList SourceCorpora { get; init; } = + new List(); + public required IReadOnlyList TargetCorpora { get; init; } = + new List(); } diff --git a/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs new file mode 100644 index 00000000..3a3025b2 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/TranslationParallelCorpusUpdateDto.cs @@ -0,0 +1,23 @@ +using System.ComponentModel.DataAnnotations; + +namespace Serval.Translation.Contracts; + +public record TranslationParallelCorpusUpdateConfigDto : IValidatableObject +{ + public IReadOnlyList? SourceCorpusRefs { get; init; } + + public IReadOnlyList? TargetCorpusRefs { get; init; } + + public IEnumerable Validate( + ValidationContext validationContext + ) + { + if (SourceCorpusRefs is null && TargetCorpusRefs is null) + { + yield return new System.ComponentModel.DataAnnotations.ValidationResult( + "At least one field must be specified.", + [nameof(SourceCorpusRefs), nameof(TargetCorpusRefs)] + ); + } + } +} diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs index acb76daa..466e954b 100644 --- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -416,7 +416,7 @@ corpusConfig.TargetFiles is null /// /// The translation engine id /// - /// The files + /// The corpora /// The client is not authenticated /// The authenticated client cannot perform the operation or does not own the translation engine /// The engine does not exist @@ -504,6 +504,201 @@ CancellationToken cancellationToken return Ok(); } + /// + /// Add a corpus to a translation engine + /// + /// + /// ## Parameters + /// * **name**: A name to help identify and distinguish the corpus from other corpora + /// * The name does not have to be unique since the corpus is uniquely identified by an auto-generated id + /// * **sourceLanguage**: The source language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + /// * Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning. + /// * **targetLanguage**: The target language code (See documentation on endpoint /translation/engines/ - "Create a new translation engine" for details on language codes). + /// * **SourceFiles**: The source files associated with the corpus + /// * **FileId**: The unique id referencing the uploaded file + /// * **TextId**: The client-defined name to associate source and target files. + /// * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine. + /// * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated. + /// * If a TextId is used more than once in SourceFiles, the sources will be randomly and evenly mixed for training. + /// * For pretranslating, multiple sources with the same TextId will be combined, but the first source will always take precedence (no random mixing). + /// * For Paratext projects, TextId will be ignored - multiple Paratext source projects will always be mixed (as if they have the same TextId). + /// * **TargetFiles**: The target files associated with the corpus + /// * Same as SourceFiles, except only a single instance of a TextID or a single paratext project is supported. There is no mixing or combining of multiple targets. + /// + /// The translation engine id + /// The corpus configuration (see remarks) + /// + /// + /// + /// The added corpus + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateTranslationEngines)] + [HttpPost("{id}/parallel-corpora")] + [ProducesResponseType(StatusCodes.Status201Created)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> AddParallelCorpusAsync( + [NotNull] string id, + [FromBody] TranslationCorpusConfigDto corpusConfig, + [FromServices] IRequestClient getDataFileClient, + [FromServices] IIdGenerator idGenerator, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + Corpus corpus = await MapAsync(getDataFileClient, idGenerator.GenerateId(), corpusConfig, cancellationToken); + await _engineService.AddCorpusAsync(id, corpus, cancellationToken); + TranslationCorpusDto dto = Map(id, corpus); + return Created(dto.Url, dto); + } + + /// + /// Update a corpus with a new set of files + /// + /// + /// Will completely replace corpus' file associations. Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete. + /// + /// The translation engine id + /// The corpus id + /// The corpus configuration + /// The data file client + /// + /// The corpus was updated successfully + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine or corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateTranslationEngines)] + [HttpPatch("{id}/parallel-corpora/{corpusId}")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> UpdateCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + [FromBody] TranslationParallelCorpusUpdateConfigDto corpusConfig, + [FromServices] IRequestClient getCorpusClient, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + ParallelCorpus parallelCorpus = await _engineService.UpdateParallelCorpusAsync( + id, + parallelCorpusId, + corpusConfig.SourceCorpusRefs is null + ? null + : await MapAsync(getCorpusClient, corpusConfig.SourceCorpusRefs, cancellationToken), + corpusConfig.TargetCorpusRefs is null + ? null + : await MapAsync(getCorpusClient, corpusConfig.TargetCorpusRefs, cancellationToken), + cancellationToken + ); + return Ok(Map(id, parallelCorpus)); + } + + /// + /// Get all parallel corpora for a translation engine + /// + /// The translation engine id + /// + /// The parallel corpora + /// The client is not authenticated + /// The authenticated client cannot perform the operation or does not own the translation engine + /// The engine does not exist + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadTranslationEngines)] + [HttpGet("{id}/parallel-corpora")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task>> GetAllParallelCorporaAsync( + [NotNull] string id, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + return Ok(engine.ParallelCorpora.Select(c => Map(id, c))); + } + + /// + /// Get the configuration of a parallel corpus for a translation engine + /// + /// The translation engine id + /// The parallel corpus id + /// + /// The parallel corpus configuration + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine or parallel corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadTranslationEngines)] + [HttpGet("{id}/parallel-corpora/{parallelCorpusId}", Name = Endpoints.GetParallelTranslationCorpus)] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + ParallelCorpus? corpus = engine.ParallelCorpora.FirstOrDefault(f => f.Id == parallelCorpusId); + if (corpus == null) + return NotFound(); + return Ok(Map(id, corpus)); + } + + /// + /// Remove a parallel corpus from a translation engine + /// + /// + /// Removing a parallel corpus will remove all pretranslations associated with that corpus. + /// + /// The translation engine id + /// The parallel corpus id + /// + /// The parallel corpus was deleted successfully. + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the translation engine. + /// The engine or parallel corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateTranslationEngines)] + [HttpDelete("{id}/parallel-corpora/{parallelCorpusId}")] + [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task DeleteParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + await _engineService.DeleteParallelCorpusAsync(id, parallelCorpusId, cancellationToken); + return Ok(); + } + /// /// Get all pretranslations in a corpus of a translation engine /// @@ -1014,6 +1209,47 @@ CancellationToken cancellationToken return files; } + private async Task> MapAsync( + IRequestClient getCorpusClient, + IEnumerable corpusIds, + CancellationToken cancellationToken + ) + { + var corpora = new List(); + foreach (string corpusId in corpusIds) + { + Response response = await getCorpusClient.GetResponse< + CorpusResult, + CorpusNotFound + >(new GetCorpus { CorpusId = corpusId, Owner = Owner }, cancellationToken); + if (response.Is(out Response? result)) + { + corpora.Add( + new ParallelCorpusSubcorpus + { + Id = corpusId, + Name = result.Message.Name ?? "", + Language = result.Message.Language, + Files = result + .Message.Files.Select(f => new CorpusFile + { + Id = f.File.DataFileId, + Filename = f.File.Filename, + Format = f.File.Format, + TextId = f.TextId + }) + .ToList(), + } + ); + } + else if (response.Is(out Response? _)) + { + throw new InvalidOperationException($"The corpus {corpusId} cannot be found."); + } + } + return corpora; + } + private Engine Map(TranslationEngineConfigDto source) { return new Engine @@ -1093,8 +1329,8 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source) new PretranslateCorpus { ParallelCorpusRef = pcc.ParallelCorpusId, - SourceFilter = Map(pcc.SourceFilter), - TargetFilter = Map(pcc.TargetFilter) + SourceFilters = pcc.SourceFilters?.Select(Map).ToList(), + TargetFilters = pcc.TargetFilters?.Select(Map).ToList() } ); } @@ -1155,8 +1391,8 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source) new TrainingCorpus { ParallelCorpusRef = tcc.ParallelCorpusId, - SourceFilter = Map(tcc.SourceFilter), - TargetFilter = Map(tcc.TargetFilter) + SourceFilters = tcc.SourceFilters?.Select(Map).ToList(), + TargetFilters = tcc.TargetFilters?.Select(Map).ToList() } ); } @@ -1164,11 +1400,8 @@ private static Build Map(Engine engine, TranslationBuildConfigDto source) return trainOnCorpora; } - private static ParallelCorpusFilter? Map(ParallelCorpusFilterConfigDto? source) + private static ParallelCorpusFilter Map(ParallelCorpusFilterConfigDto source) { - if (source is null) - return null; - if (source.TextIds != null && source.ScriptureRange != null) { throw new InvalidOperationException( @@ -1263,11 +1496,14 @@ private PretranslateCorpusDto Map(string engineId, PretranslateCorpus source) ? new ResourceLinkDto { Id = source.ParallelCorpusRef, - Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = source.ParallelCorpusRef }) //TODO not get corpus! + Url = _urlService.GetUrl( + Endpoints.GetParallelTranslationCorpus, + new { id = source.ParallelCorpusRef } + ) } : null, - SourceFilter = Map(source.SourceFilter), - TargetFilter = Map(source.TargetFilter) + SourceFilters = source.SourceFilters?.Select(Map).ToList(), + TargetFilters = source.TargetFilters?.Select(Map).ToList() }; } @@ -1293,19 +1529,19 @@ private TrainingCorpusDto Map(string engineId, TrainingCorpus source) ? new ResourceLinkDto { Id = source.ParallelCorpusRef, - Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = source.ParallelCorpusRef }) //TODO not get corpus! + Url = _urlService.GetUrl( + Endpoints.GetParallelTranslationCorpus, + new { id = source.ParallelCorpusRef } + ) } : null, - SourceFilter = Map(source.SourceFilter), - TargetFilter = Map(source.TargetFilter) + SourceFilters = source.SourceFilters?.Select(Map).ToList(), + TargetFilters = source.TargetFilters?.Select(Map).ToList() }; } - private ParallelCorpusFilterDto? Map(ParallelCorpusFilter? source) + private ParallelCorpusFilterDto Map(ParallelCorpusFilter source) { - if (source is null) - return null; - return new ParallelCorpusFilterDto { Corpus = new ResourceLinkDto @@ -1403,6 +1639,33 @@ private TranslationCorpusDto Map(string engineId, Corpus source) }; } + private TranslationParallelCorpusDto Map(string engineId, ParallelCorpus source) + { + return new TranslationParallelCorpusDto + { + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = engineId, corpusId = source.Id }), + Engine = new ResourceLinkDto + { + Id = engineId, + Url = _urlService.GetUrl(Endpoints.GetTranslationEngine, new { id = engineId }) + }, + SourceCorpora = source.SourceCorpora.Select(Map).ToList(), + TargetCorpora = source.TargetCorpora.Select(Map).ToList() + }; + } + + private TranslationParallelCorpusSubcorpusDto Map(ParallelCorpusSubcorpus source) + { + return new TranslationParallelCorpusSubcorpusDto + { + Id = source.Id, + Language = source.Language, + Name = source.Name, + Files = source.Files.Select(Map).ToList() + }; + } + private TranslationCorpusFileDto Map(CorpusFile source) { return new TranslationCorpusFileDto diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs b/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs index 175d6153..56de5ac2 100644 --- a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs +++ b/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs @@ -3,6 +3,6 @@ public record ParallelCorpus { public required string Id { get; set; } - public IReadOnlyList SourceCorporaRefs { get; set; } = new List(); - public IReadOnlyList TargetCorporaRefs { get; set; } = new List(); + public IReadOnlyList SourceCorpora { get; set; } = new List(); + public IReadOnlyList TargetCorpora { get; set; } = new List(); } diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpusSubcorpus.cs b/src/Serval/src/Serval.Translation/Models/ParallelCorpusSubcorpus.cs new file mode 100644 index 00000000..e67b90e2 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Models/ParallelCorpusSubcorpus.cs @@ -0,0 +1,9 @@ +namespace Serval.Translation.Models; + +public record ParallelCorpusSubcorpus +{ + public required string Id { get; set; } + public required string Name { get; set; } + public required string Language { get; set; } + public required IReadOnlyList Files { get; set; } +} diff --git a/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs b/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs index 52105778..a44c52f7 100644 --- a/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs +++ b/src/Serval/src/Serval.Translation/Models/PretranslateCorpus.cs @@ -7,6 +7,6 @@ public record PretranslateCorpus public string? ScriptureRange { get; set; } public string? ParallelCorpusRef { get; set; } - public ParallelCorpusFilter? SourceFilter { get; set; } - public ParallelCorpusFilter? TargetFilter { get; set; } + public IReadOnlyList? SourceFilters { get; set; } + public IReadOnlyList? TargetFilters { get; set; } //TODO is this needed? } diff --git a/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs b/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs index 695e7687..fc927406 100644 --- a/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs +++ b/src/Serval/src/Serval.Translation/Models/TrainingCorpus.cs @@ -7,6 +7,6 @@ public record TrainingCorpus public string? ScriptureRange { get; set; } public string? ParallelCorpusRef { get; set; } - public ParallelCorpusFilter? SourceFilter { get; set; } - public ParallelCorpusFilter? TargetFilter { get; set; } + public IReadOnlyList? SourceFilters { get; set; } + public IReadOnlyList? TargetFilters { get; set; } } diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index 07a25b7f..c1a4f305 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -1,5 +1,6 @@ using MassTransit.Mediator; using Serval.Translation.V1; +using SIL.Scripture; namespace Serval.Translation.Services; @@ -199,126 +200,85 @@ await _dataAccessContext.WithTransactionAsync( ); } + private Dictionary> GetChapters(string fileLocation, string scriptureRange) + { + try + { + return ScriptureRangeParser.GetChapters( + scriptureRange, + _scriptureDataFileService.GetParatextProjectSettings(fileLocation).Versification //TODO corpus.TargetFiles.First().Location + ); + } + catch (ArgumentException ae) + { + throw new InvalidOperationException($"The scripture range {scriptureRange} is not valid: {ae.Message}"); + } + } + public async Task StartBuildAsync(Build build, CancellationToken cancellationToken = default) { Engine engine = await GetAsync(build.EngineRef, cancellationToken); await _builds.InsertAsync(build, cancellationToken); + TranslationEngineApi.TranslationEngineApiClient client = + _grpcClientFactory.CreateClient(engine.Type); + try { - var pretranslate = build.Pretranslate?.ToDictionary(c => c.CorpusRef); - var trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef); - TranslationEngineApi.TranslationEngineApiClient client = - _grpcClientFactory.CreateClient(engine.Type); - Dictionary> GetChapters(V1.Corpus corpus, string scriptureRange) + StartBuildRequest request; + if (build.Pretranslate?.Select(p => p.CorpusRef).All(r => r != null) ?? false) { - try - { - return ScriptureRangeParser.GetChapters( - scriptureRange, - _scriptureDataFileService - .GetParatextProjectSettings(corpus.TargetFiles.First().Location) - .Versification - ); - } - catch (ArgumentException ae) + var pretranslate = build.Pretranslate?.ToDictionary(c => c.CorpusRef!); + var trainOn = build.TrainOn?.ToDictionary(c => c.CorpusRef!); + + request = new StartBuildRequest { - throw new InvalidOperationException( - $"The scripture range {scriptureRange} is not valid: {ae.Message}" - ); - } + EngineType = engine.Type, + EngineId = engine.Id, + BuildId = build.Id, + Corpora = + { + engine.Corpora.Select(c => + Map( + c, + trainOn?.GetValueOrDefault(c.Id), + pretranslate?.GetValueOrDefault(c.Id), + trainOnAll: trainOn == null + ) + ) + } + }; } - var request = new StartBuildRequest + else if (build.Pretranslate?.Select(p => p.ParallelCorpusRef).All(r => r != null) ?? false) { - EngineType = engine.Type, - EngineId = engine.Id, - BuildId = build.Id, - Corpora = + var pretranslate = build.Pretranslate?.ToDictionary(c => c.ParallelCorpusRef!); + List corporaPerTexts = GetTrainingCorporaPerTexts( + build.TrainOn ?? new List(), + engine.ParallelCorpora + ); + request = new StartBuildRequest { - engine.Corpora.Select(c => + EngineType = engine.Type, + EngineId = engine.Id, + BuildId = build.Id, + Corpora = { - V1.Corpus corpus = Map(c); - if (pretranslate?.TryGetValue(c.Id, out PretranslateCorpus? pretranslateCorpus) ?? false) - { - corpus.PretranslateAll = - pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null; - if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null) - { - throw new InvalidOperationException( - $"The corpus {c.Id} cannot specify both 'textIds' and 'scriptureRange' for 'pretranslate'." - ); - } - if (pretranslateCorpus.TextIds is not null) - corpus.PretranslateTextIds.Add(pretranslateCorpus.TextIds); - if (!string.IsNullOrEmpty(pretranslateCorpus.ScriptureRange)) - { - if ( - c.TargetFiles.Count > 1 - || c.TargetFiles[0].Format != Shared.Contracts.FileFormat.Paratext - ) - { - throw new InvalidOperationException( - $"The corpus {c.Id} is not compatible with using a scripture range" - ); - } - corpus.PretranslateChapters.Add( - GetChapters(corpus, pretranslateCorpus.ScriptureRange) - .Select( - (kvp) => - { - var scriptureChapters = new ScriptureChapters(); - scriptureChapters.Chapters.Add(kvp.Value); - return (kvp.Key, scriptureChapters); - } - ) - .ToDictionary() - ); - } - } - if (trainOn?.TryGetValue(c.Id, out TrainingCorpus? trainingCorpus) ?? false) - { - corpus.TrainOnAll = trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null; - if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null) - { - throw new InvalidOperationException( - $"The corpus {c.Id} cannot specify both 'textIds' and 'scriptureRange' for trainOn" - ); - } - if (trainingCorpus.TextIds is not null) - corpus.TrainOnTextIds.Add(trainingCorpus.TextIds); - if (!string.IsNullOrEmpty(trainingCorpus.ScriptureRange)) - { - if ( - c.TargetFiles.Count > 1 - || c.TargetFiles[0].Format != Shared.Contracts.FileFormat.Paratext - ) - { - throw new InvalidOperationException( - $"The corpus {c.Id} is not compatible with using a scripture range" - ); - } - corpus.TrainOnChapters.Add( - GetChapters(corpus, trainingCorpus.ScriptureRange) - .Select( - (kvp) => - { - var scriptureChapters = new ScriptureChapters(); - scriptureChapters.Chapters.Add(kvp.Value); - return (kvp.Key, scriptureChapters); - } - ) - .ToDictionary() - ); - } - } - else if (trainOn is null) - { - corpus.TrainOnAll = true; - } - return corpus; - }) - } - }; + engine.ParallelCorpora.SelectMany(c => + Map( + c, + corporaPerTexts, + pretranslate?.GetValueOrDefault(c.Id), + trainOnAll: build.TrainOn == null + ) + ) + } + }; + } + else + { + throw new InvalidOperationException($"No corpora associated with engine {engine.Id}"); + } + if (build.Options is not null) request.Options = JsonSerializer.Serialize(build.Options); @@ -349,7 +309,6 @@ Dictionary> GetChapters(V1.Corpus corpus, string scriptureRang _logger.LogInformation("Error parsing build request summary."); _logger.LogInformation("{request}", JsonSerializer.Serialize(request)); } - await client.StartBuildAsync(request, cancellationToken: cancellationToken); } catch @@ -359,6 +318,181 @@ Dictionary> GetChapters(V1.Corpus corpus, string scriptureRang } } + private record TrainingSubcorpus + { + public required Dictionary> Chapters { get; init; } + public required List SourceCorporaRefs { get; init; } + public required List TargetCorporaRefs { get; init; } + } + + private enum CorpusType + { + Source = 0, + Target = 1 + } + + private List GetTrainingCorporaPerTexts( + IReadOnlyList trainingCorpora, + IReadOnlyList parallelCorpora + ) + { + Dictionary fileLocations = parallelCorpora + .SelectMany(pc => pc.SourceCorpora.Concat(pc.TargetCorpora)) + .Where(c => c.Files.Count > 0) + .Select(c => (c.Id, Path.Combine(_dataFileOptions.CurrentValue.FilesDirectory, c.Files[0].Filename))) + .ToDictionary(); + + Dictionary<(string Book, int Chapter), (string CorpusRef, CorpusType CorpusType)> sourceCorporaPerChapters = + trainingCorpora + .Where(tc => tc.SourceFilters != null) + .SelectMany(tc => tc.SourceFilters!) + .SelectMany(sf => + { + if (sf.TextIds is not null && sf.ScriptureRange is not null) + { + throw new InvalidOperationException( + $"Cannot specify both 'textIds' and 'scriptureRange' for 'pretranslate'." + ); + } + var bookChapters = new List<(string, int)>(); + if (fileLocations.TryGetValue(sf.CorpusRef, out string? fileLocation)) + { + ScrVers versification = _scriptureDataFileService + .GetParatextProjectSettings(fileLocation) + .Versification; + if (sf.TextIds != null) + { + bookChapters = sf.TextIds.SelectMany(id => + Enumerable + .Range(1, versification.GetLastChapter(Canon.BookIdToNumber(id))) + .Select(chpt => (id, chpt)) + ) + .ToList(); + } + else if (!string.IsNullOrEmpty(sf.ScriptureRange)) + { + bookChapters = GetChapters(sf.ScriptureRange, fileLocation) + .Select(kvp => + ( + kvp.Key, + kvp.Value.Count == 0 + ? Enumerable.Range( + 1, + versification.GetLastChapter(Canon.BookIdToNumber(kvp.Key)) + ) + : kvp.Value + ) + ) + .SelectMany(tup => tup.Item2.Select(chpt => (tup.Item1, chpt))) + .ToList(); + } + } + return bookChapters.Select(bc => ((bc.Item1, bc.Item2), (sf.CorpusRef, CorpusType.Source))); + }) + .ToDictionary(); + + Dictionary<(string Book, int Chapter), (string CorpusRef, CorpusType CorpusType)> targetCorporaPerChapters = + trainingCorpora + .Where(tc => tc.TargetFilters != null) + .SelectMany(tc => tc.TargetFilters!) + .SelectMany(tf => + { + var bookChapters = new List<(string, int)>(); + if (fileLocations.TryGetValue(tf.CorpusRef, out string? fileLocation)) + { + ScrVers versification = _scriptureDataFileService + .GetParatextProjectSettings(fileLocation) + .Versification; + if (tf.TextIds != null) + { + bookChapters = tf.TextIds.SelectMany(id => + Enumerable + .Range(1, versification.GetLastChapter(Canon.BookIdToNumber(id))) + .Select(chpt => (id, chpt)) + ) + .ToList(); + } + else if (!string.IsNullOrEmpty(tf.ScriptureRange)) + { + bookChapters = GetChapters(tf.ScriptureRange, fileLocation) + .Select(kvp => + ( + kvp.Key, + kvp.Value.Count == 0 + ? Enumerable.Range( + 1, + versification.GetLastChapter(Canon.BookIdToNumber(kvp.Key)) + ) + : kvp.Value + ) + ) + .SelectMany(tup => tup.Item2.Select(chpt => (tup.Item1, chpt))) + .ToList(); + } + } + return bookChapters.Select(bc => ((bc.Item1, bc.Item2), (tf.CorpusRef, CorpusType.Source))); + }) + .ToDictionary(); + + return sourceCorporaPerChapters + .Concat(targetCorporaPerChapters) + .Aggregate( + new Dictionary<(string Book, int Chapter), List<(string CorpusRef, CorpusType CorpusType)>>(), + (dict, kvp) => + { + if (dict.TryGetValue(kvp.Key, out List<(string, CorpusType)>? corporaList)) + { + corporaList.Add(kvp.Value); + } + else + { + dict[kvp.Key] = new() { kvp.Value }; + } + return dict; + } + ) + .Aggregate( + new Dictionary, Dictionary>>(), + (dict, kvp) => + { + if (dict.TryGetValue(kvp.Value, out Dictionary>? chaptersPerBook)) + { + if (chaptersPerBook.TryGetValue(kvp.Key.Book, out List? chapters)) + { + if (!chapters.Contains(kvp.Key.Chapter)) + chapters.Add(kvp.Key.Chapter); + } + else + { + chaptersPerBook[kvp.Key.Book] = new List(); + } + } + else + { + dict[kvp.Value] = new Dictionary>() + { + { + kvp.Key.Book, + new() { kvp.Key.Chapter } + } + }; + } + return dict; + } + ) + .Select(kvp => new TrainingSubcorpus() + { + SourceCorporaRefs = kvp.Key.Where(tup => tup.CorpusType == CorpusType.Source) + .Select(tup => tup.CorpusRef) + .ToList(), + TargetCorporaRefs = kvp.Key.Where(tup => tup.CorpusType == CorpusType.Target) + .Select(tup => tup.CorpusRef) + .ToList(), + Chapters = kvp.Value + }) + .ToList(); + } + public async Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default) { Engine? engine = await GetAsync(engineId, cancellationToken); @@ -487,8 +621,8 @@ public Task AddParallelCorpus(string engineId, ParallelCorpus corpus, Cancellati public async Task UpdateParallelCorpusAsync( string engineId, string parallelCorpusId, - IReadOnlyList? sourceCorpusRefs, - IReadOnlyList? targetCorpusRefs, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, CancellationToken cancellationToken = default ) { @@ -496,10 +630,10 @@ public async Task UpdateParallelCorpusAsync( e => e.Id == engineId && e.ParallelCorpora.Any(c => c.Id == parallelCorpusId), u => { - if (sourceCorpusRefs is not null) - u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].SourceCorporaRefs, sourceCorpusRefs); - if (targetCorpusRefs is not null) - u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].TargetCorporaRefs, targetCorpusRefs); + if (sourceCorpora is not null) + u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].SourceCorpora, sourceCorpora); + if (targetCorpora is not null) + u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].TargetCorpora, targetCorpora); }, cancellationToken: cancellationToken ); @@ -513,7 +647,7 @@ public async Task UpdateParallelCorpusAsync( return engine.ParallelCorpora.First(c => c.Id == parallelCorpusId); } - public async Task DeleteParallelCorpus( + public async Task DeleteParallelCorpusAsync( string engineId, string parallelCorpusId, CancellationToken cancellationToken = default @@ -647,9 +781,14 @@ private Models.WordGraphArc Map(V1.WordGraphArc source) }; } - private V1.Corpus Map(Models.Corpus source) + private V1.Corpus Map( + Models.Corpus source, + TrainingCorpus? trainingCorpus, + PretranslateCorpus? pretranslateCorpus, + bool trainOnAll = false + ) { - return new V1.Corpus + var corpus = new V1.Corpus { Id = source.Id, SourceLanguage = source.SourceLanguage, @@ -657,6 +796,155 @@ private V1.Corpus Map(Models.Corpus source) SourceFiles = { source.SourceFiles.Select(Map) }, TargetFiles = { source.TargetFiles.Select(Map) } }; + if (pretranslateCorpus != null) + { + corpus.PretranslateAll = pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null; + if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null) + { + throw new InvalidOperationException( + $"The corpus {source.Id} cannot specify both 'textIds' and 'scriptureRange' for 'pretranslate'." + ); + } + if (pretranslateCorpus.TextIds is not null) + corpus.PretranslateTextIds.Add(pretranslateCorpus.TextIds); + if (!string.IsNullOrEmpty(pretranslateCorpus.ScriptureRange)) + { + if ( + source.TargetFiles.Count > 1 + || source.TargetFiles[0].Format != Shared.Contracts.FileFormat.Paratext + ) + { + throw new InvalidOperationException( + $"The corpus {source.Id} is not compatible with using a scripture range" + ); + } + corpus.PretranslateChapters.Add( + GetChapters(corpus.TargetFiles[0].Location, pretranslateCorpus.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary() + ); + } + } + if (trainingCorpus != null) + { + corpus.TrainOnAll = trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null; + if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null) + { + throw new InvalidOperationException( + $"The corpus {source.Id} cannot specify both 'textIds' and 'scriptureRange' for trainOn" + ); + } + if (trainingCorpus.TextIds is not null) + corpus.TrainOnTextIds.Add(trainingCorpus.TextIds); + if (!string.IsNullOrEmpty(trainingCorpus.ScriptureRange)) + { + if ( + source.TargetFiles.Count > 1 + || source.TargetFiles[0].Format != Shared.Contracts.FileFormat.Paratext + ) + { + throw new InvalidOperationException( + $"The corpus {source.Id} is not compatible with using a scripture range" + ); + } + corpus.TrainOnChapters.Add( + GetChapters(corpus.TargetFiles[0].Location, trainingCorpus.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary() + ); + } + } + corpus.TrainOnAll = trainOnAll; + return corpus; + } + + private IEnumerable Map( + ParallelCorpus source, + List trainingCorpora, + PretranslateCorpus? pretranslateCorpus, + bool trainOnAll = false + ) + { + if (pretranslateCorpus != null) + { + var corpus = new V1.Corpus + { + Id = source.Id, + SourceLanguage = source.SourceCorpora[0].Language, + TargetLanguage = source.TargetCorpora[0].Language, + SourceFiles = { source.SourceCorpora.SelectMany(c => c.Files.Select(Map)) }, + TargetFiles = { source.TargetCorpora.SelectMany(c => c.Files.Select(Map)) } + }; + yield return corpus; + corpus.PretranslateChapters.Clear(); + } + if (!trainOnAll && trainingCorpora != null) + { + foreach (TrainingSubcorpus trainingCorpus in trainingCorpora) + { + var corpus = new V1.Corpus + { + Id = source.Id, + SourceLanguage = source.SourceCorpora[0].Language, + TargetLanguage = source.TargetCorpora[0].Language, + SourceFiles = + { + source + .SourceCorpora.Where(sc => trainingCorpus.SourceCorporaRefs.Contains(sc.Id)) + .SelectMany(sc => sc.Files) + .Select(Map) + }, + TargetFiles = + { + source + .TargetCorpora.Where(sc => trainingCorpus.TargetCorporaRefs.Contains(sc.Id)) + .SelectMany(sc => sc.Files) + .Select(Map) + } + }; + corpus.TrainOnChapters.Add( + trainingCorpus + .Chapters.Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary() + ); + + yield return corpus; + corpus.TrainOnChapters.Clear(); + } + } + else + { + yield return new V1.Corpus + { + Id = source.Id, + SourceLanguage = source.SourceCorpora[0].Language, + TargetLanguage = source.TargetCorpora[0].Language, + SourceFiles = { source.SourceCorpora.SelectMany(c => c.Files.Select(Map)) }, + TargetFiles = { source.TargetCorpora.SelectMany(c => c.Files.Select(Map)) }, + TrainOnAll = trainOnAll + }; + } } private V1.CorpusFile Map(Models.CorpusFile source) diff --git a/src/Serval/src/Serval.Translation/Services/IEngineService.cs b/src/Serval/src/Serval.Translation/Services/IEngineService.cs index 25db9acd..82fc5df4 100644 --- a/src/Serval/src/Serval.Translation/Services/IEngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/IEngineService.cs @@ -56,11 +56,15 @@ Task DeleteCorpusAsync( Task UpdateParallelCorpusAsync( string engineId, string parallelCorpusId, - IReadOnlyList? sourceCorpusRefs, - IReadOnlyList? targetCorpusRefs, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, + CancellationToken cancellationToken = default + ); + Task DeleteParallelCorpusAsync( + string engineId, + string parallelCorpusId, CancellationToken cancellationToken = default ); - Task DeleteParallelCorpus(string engineId, string parallelCorpusId, CancellationToken cancellationToken = default); Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cancellationToken = default);