From de529ceaef6f323e6ad19c8f0618cbf707fa0969 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Mon, 2 Dec 2024 13:43:25 -0500 Subject: [PATCH] Sync corpora and data files in translation engine Use mongoDB strings directly for Mongo array filtering (hacky solution - need to fix later https://github.com/sillsdev/serval/issues/553). Broken implementation for MemoryRepository - https://github.com/sillsdev/serval/issues/554 --- docker-compose.mongo.yml | 2 +- .../src/SIL.DataAccess/ArrayPosition.cs | 1 + .../DataAccessFieldDefinition.cs | 4 + .../src/SIL.DataAccess/IRepository.cs | 8 ++ .../src/SIL.DataAccess/MemoryRepository.cs | 11 ++ .../src/SIL.DataAccess/MemoryUpdateBuilder.cs | 2 + .../src/SIL.DataAccess/MongoRepository.cs | 25 +++- .../src/SIL.DataAccess/MongoUpdateBuilder.cs | 22 +-- .../DistributedReaderWriterLockFactory.cs | 6 +- src/Serval/src/Serval.Client/Client.g.cs | 60 ++++---- .../Consumers/GetCorpusConsumer.cs | 28 ++-- .../Contracts/CorpusFileDto.cs | 2 +- .../Controllers/CorporaController.cs | 16 +-- .../src/Serval.DataFiles/Models/CorpusFile.cs | 2 +- .../Services/CorpusService.cs | 57 ++++++-- .../Services/DataFileService.cs | 1 + .../Serval.Shared/Contracts/CorpusUpdated.cs | 7 + .../Contracts/DataFileUpdated.cs | 1 + ...iatorRegistrationConfiguratorExtensions.cs | 2 + .../Consumers/CorpusUpdatedConsumer.cs | 26 ++++ .../Consumers/DataFileUpdatedConsumer.cs | 15 ++ .../Services/EngineService.cs | 77 +++++++++- .../Services/IEngineService.cs | 12 ++ .../TranslationEngineTests.cs | 135 ++++++++++++++---- .../Services/CorpusServiceTests.cs | 18 ++- 25 files changed, 426 insertions(+), 114 deletions(-) create mode 100644 src/Serval/src/Serval.Shared/Contracts/CorpusUpdated.cs create mode 100644 src/Serval/src/Serval.Translation/Consumers/CorpusUpdatedConsumer.cs create mode 100644 src/Serval/src/Serval.Translation/Consumers/DataFileUpdatedConsumer.cs diff --git a/docker-compose.mongo.yml b/docker-compose.mongo.yml index 1f1bbec9..b99403b9 100644 --- a/docker-compose.mongo.yml +++ b/docker-compose.mongo.yml @@ -15,5 +15,5 @@ services: [ '/bin/sh', '-c', - 'mongod --quiet --replSet myRS --bind_ip 0.0.0.0 & sleep 2s; mongosh --host localhost:27017 --eval '' config = { "_id" : "myRS", "members" : [{"_id" : 0,"host" : "mongo:27017"}] }; rs.initiate(config, { force: true }); '' ; sleep infinity' + 'mongod --profile=2 --replSet myRS --bind_ip 0.0.0.0 & sleep 2s; mongosh --host localhost:27017 --eval '' config = { "_id" : "myRS", "members" : [{"_id" : 0,"host" : "mongo:27017"}] }; rs.initiate(config, { force: true }); '' ; sleep infinity' ] diff --git a/src/DataAccess/src/SIL.DataAccess/ArrayPosition.cs b/src/DataAccess/src/SIL.DataAccess/ArrayPosition.cs index 674972cf..d47cb47a 100644 --- a/src/DataAccess/src/SIL.DataAccess/ArrayPosition.cs +++ b/src/DataAccess/src/SIL.DataAccess/ArrayPosition.cs @@ -4,4 +4,5 @@ public static class ArrayPosition { public const int FirstMatching = int.MaxValue; public const int All = int.MaxValue - 1; + public const int ArrayFilter = int.MaxValue - 2; } diff --git a/src/DataAccess/src/SIL.DataAccess/DataAccessFieldDefinition.cs b/src/DataAccess/src/SIL.DataAccess/DataAccessFieldDefinition.cs index b7be7638..e400de08 100644 --- a/src/DataAccess/src/SIL.DataAccess/DataAccessFieldDefinition.cs +++ b/src/DataAccess/src/SIL.DataAccess/DataAccessFieldDefinition.cs @@ -17,6 +17,10 @@ LinqProvider linqProvider linqProvider ); string fieldName = rendered.FieldName.Replace(ArrayPosition.All.ToString(CultureInfo.InvariantCulture), "$[]"); + fieldName = fieldName.Replace( + ArrayPosition.ArrayFilter.ToString(CultureInfo.InvariantCulture), + "$[arrayFilter]" + ); fieldName = fieldName.Replace(ArrayPosition.FirstMatching.ToString(CultureInfo.InvariantCulture), "$"); if (fieldName != rendered.FieldName) { diff --git a/src/DataAccess/src/SIL.DataAccess/IRepository.cs b/src/DataAccess/src/SIL.DataAccess/IRepository.cs index 95539b96..bc454bd8 100644 --- a/src/DataAccess/src/SIL.DataAccess/IRepository.cs +++ b/src/DataAccess/src/SIL.DataAccess/IRepository.cs @@ -16,9 +16,17 @@ public interface IRepository bool returnOriginal = false, CancellationToken cancellationToken = default ); + Task UpdateAllAsync( + Expression> filter, + Action> update, + string jsonArrayFilterDefinition, + CancellationToken cancellationToken = default + ); + Task UpdateAllAsync( Expression> filter, Action> update, + UpdateOptions? updateOptions = null, CancellationToken cancellationToken = default ); Task DeleteAsync(Expression> filter, CancellationToken cancellationToken = default); diff --git a/src/DataAccess/src/SIL.DataAccess/MemoryRepository.cs b/src/DataAccess/src/SIL.DataAccess/MemoryRepository.cs index 93c0ca47..c0d062e4 100644 --- a/src/DataAccess/src/SIL.DataAccess/MemoryRepository.cs +++ b/src/DataAccess/src/SIL.DataAccess/MemoryRepository.cs @@ -233,9 +233,20 @@ public async Task InsertAllAsync(IReadOnlyCollection entities, CancellationTo return returnOriginal ? original : entity; } + public async Task UpdateAllAsync( + Expression> filter, + Action> update, + string jsonArrayFilterDefinition, + CancellationToken cancellationToken = default + ) + { + return await UpdateAllAsync(filter, update, null, cancellationToken); + } + public async Task UpdateAllAsync( Expression> filter, Action> update, + UpdateOptions? updateOptions = null, CancellationToken cancellationToken = default ) { diff --git a/src/DataAccess/src/SIL.DataAccess/MemoryUpdateBuilder.cs b/src/DataAccess/src/SIL.DataAccess/MemoryUpdateBuilder.cs index 78e00924..fca815e7 100644 --- a/src/DataAccess/src/SIL.DataAccess/MemoryUpdateBuilder.cs +++ b/src/DataAccess/src/SIL.DataAccess/MemoryUpdateBuilder.cs @@ -224,6 +224,8 @@ Expression> field } break; case ArrayPosition.All: + // This doesn't filter as it should - but it's good enough for unit testing. + case ArrayPosition.ArrayFilter: newOwners.AddRange(((IEnumerable)owner).Cast()); break; default: diff --git a/src/DataAccess/src/SIL.DataAccess/MongoRepository.cs b/src/DataAccess/src/SIL.DataAccess/MongoRepository.cs index 8ba08dde..92d521ea 100644 --- a/src/DataAccess/src/SIL.DataAccess/MongoRepository.cs +++ b/src/DataAccess/src/SIL.DataAccess/MongoRepository.cs @@ -151,9 +151,24 @@ await _collection return entity; } + public async Task UpdateAllAsync( + Expression> filter, + Action> update, + string jsonArrayFilterDefinition, + CancellationToken cancellationToken = default + ) + { + var updateOptions = new UpdateOptions + { + ArrayFilters = [new JsonArrayFilterDefinition(jsonArrayFilterDefinition)] + }; + return await UpdateAllAsync(filter, update, updateOptions, cancellationToken).ConfigureAwait(false); + } + public async Task UpdateAllAsync( Expression> filter, Action> update, + UpdateOptions? updateOptions = null, CancellationToken cancellationToken = default ) { @@ -167,13 +182,19 @@ public async Task UpdateAllAsync( if (_context.Session is not null) { result = await _collection - .UpdateManyAsync(_context.Session, filter, updateDef, cancellationToken: cancellationToken) + .UpdateManyAsync( + _context.Session, + filter, + updateDef, + updateOptions, + cancellationToken: cancellationToken + ) .ConfigureAwait(false); } else { result = await _collection - .UpdateManyAsync(filter, updateDef, cancellationToken: cancellationToken) + .UpdateManyAsync(filter, updateDef, updateOptions, cancellationToken: cancellationToken) .ConfigureAwait(false); } } diff --git a/src/DataAccess/src/SIL.DataAccess/MongoUpdateBuilder.cs b/src/DataAccess/src/SIL.DataAccess/MongoUpdateBuilder.cs index 91506f4d..e684563f 100644 --- a/src/DataAccess/src/SIL.DataAccess/MongoUpdateBuilder.cs +++ b/src/DataAccess/src/SIL.DataAccess/MongoUpdateBuilder.cs @@ -3,36 +3,38 @@ namespace SIL.DataAccess; public class MongoUpdateBuilder : IUpdateBuilder where T : IEntity { - private readonly UpdateDefinitionBuilder _builder; + private readonly UpdateDefinitionBuilder _updateBuilder; + private readonly FilterDefinitionBuilder _filterBuilder; private readonly List> _defs; public MongoUpdateBuilder() { - _builder = Builders.Update; + _updateBuilder = Builders.Update; + _filterBuilder = Builders.Filter; _defs = new List>(); } public IUpdateBuilder Set(Expression> field, TField value) { - _defs.Add(_builder.Set(ToFieldDefinition(field), value)); + _defs.Add(_updateBuilder.Set(ToFieldDefinition(field), value)); return this; } public IUpdateBuilder SetOnInsert(Expression> field, TField value) { - _defs.Add(_builder.SetOnInsert(ToFieldDefinition(field), value)); + _defs.Add(_updateBuilder.SetOnInsert(ToFieldDefinition(field), value)); return this; } public IUpdateBuilder Unset(Expression> field) { - _defs.Add(_builder.Unset(ToFieldDefinition(field))); + _defs.Add(_updateBuilder.Unset(ToFieldDefinition(field))); return this; } public IUpdateBuilder Inc(Expression> field, int value = 1) { - _defs.Add(_builder.Inc(ToFieldDefinition(field), value)); + _defs.Add(_updateBuilder.Inc(ToFieldDefinition(field), value)); return this; } @@ -41,19 +43,19 @@ public IUpdateBuilder RemoveAll( Expression> predicate ) { - _defs.Add(_builder.PullFilter(ToFieldDefinition(field), Builders.Filter.Where(predicate))); + _defs.Add(_updateBuilder.PullFilter(ToFieldDefinition(field), Builders.Filter.Where(predicate))); return this; } public IUpdateBuilder Remove(Expression?>> field, TItem value) { - _defs.Add(_builder.Pull(ToFieldDefinition(field), value)); + _defs.Add(_updateBuilder.Pull(ToFieldDefinition(field), value)); return this; } public IUpdateBuilder Add(Expression?>> field, TItem value) { - _defs.Add(_builder.Push(ToFieldDefinition(field), value)); + _defs.Add(_updateBuilder.Push(ToFieldDefinition(field), value)); return this; } @@ -61,7 +63,7 @@ public UpdateDefinition Build() { if (_defs.Count == 1) return _defs.Single(); - return _builder.Combine(_defs); + return _updateBuilder.Combine(_defs); } private static FieldDefinition ToFieldDefinition(Expression> field) diff --git a/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs index e0d44795..484c1eda 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs @@ -55,7 +55,7 @@ private async Task ReleaseAllWriterLocksAsync(CancellationToken cancellationToke await _locks.UpdateAllAsync( rwl => rwl.WriterLock != null && rwl.WriterLock.HostId == _serviceOptions.ServiceId, u => u.Unset(rwl => rwl.WriterLock), - cancellationToken + cancellationToken: cancellationToken ); } @@ -64,7 +64,7 @@ private async Task ReleaseAllReaderLocksAsync(CancellationToken cancellationToke await _locks.UpdateAllAsync( rwl => rwl.ReaderLocks.Any(l => l.HostId == _serviceOptions.ServiceId), u => u.RemoveAll(rwl => rwl.ReaderLocks, l => l.HostId == _serviceOptions.ServiceId), - cancellationToken + cancellationToken: cancellationToken ); } @@ -73,7 +73,7 @@ private async Task RemoveAllWaitersAsync(CancellationToken cancellationToken) await _locks.UpdateAllAsync( rwl => rwl.WriterQueue.Any(l => l.HostId == _serviceOptions.ServiceId), u => u.RemoveAll(rwl => rwl.WriterQueue, l => l.HostId == _serviceOptions.ServiceId), - cancellationToken + cancellationToken: cancellationToken ); } } diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index f7645763..b0edfda4 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -7073,9 +7073,37 @@ public partial class Corpus [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class CorpusFile { - [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string FileId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusConfig + { + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] - public DataFile File { get; set; } = new DataFile(); + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusFileConfig + { + [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string FileId { get; set; } = default!; [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? TextId { get; set; } = default!; @@ -7118,34 +7146,6 @@ public enum FileFormat } - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusConfig - { - [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Name { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Language { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusFileConfig - { - [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string FileId { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? TextId { get; set; } = default!; - - } - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TranslationEngine { diff --git a/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs b/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs index c369d528..02e7d3c3 100644 --- a/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs +++ b/src/Serval/src/Serval.DataFiles/Consumers/GetCorpusConsumer.cs @@ -1,8 +1,9 @@ namespace Serval.DataFiles.Consumers; -public class GetCorpusConsumer(ICorpusService corpusService) : IConsumer +public class GetCorpusConsumer(ICorpusService corpusService, IDataFileService dataFileService) : IConsumer { private readonly ICorpusService _corpusService = corpusService; + private readonly IDataFileService _dataFileService = dataFileService; public async Task Consume(ConsumeContext context) { @@ -19,19 +20,13 @@ await context.RespondAsync( CorpusId = corpus.Id, Name = corpus.Name, Language = corpus.Language, - Files = corpus - .Files.Select(f => new CorpusFileResult + Files = await Task.WhenAll( + corpus.Files.Select(async f => new CorpusFileResult { TextId = f.TextId!, - File = new DataFileResult - { - DataFileId = f.File.Id, - Filename = f.File.Filename, - Format = f.File.Format, - Name = f.File.Name - } + File = Map(await _dataFileService.GetAsync(f.FileId)) }) - .ToList() + ) } ); } @@ -42,4 +37,15 @@ await context.RespondAsync( ); } } + + private static DataFileResult Map(DataFile dataFile) + { + return new DataFileResult + { + DataFileId = dataFile.Id, + Name = dataFile.Name, + Filename = dataFile.Filename, + Format = dataFile.Format, + }; + } } diff --git a/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs index d2d175be..0efeaac9 100644 --- a/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs +++ b/src/Serval/src/Serval.DataFiles/Contracts/CorpusFileDto.cs @@ -2,6 +2,6 @@ namespace Serval.DataFiles.Contracts; public record CorpusFileDto { - public required DataFileDto File { get; init; } + public required string FileId { get; init; } public string? TextId { get; init; } } diff --git a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs index 29bf041e..c30a5ed3 100644 --- a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs +++ b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs @@ -177,7 +177,7 @@ CancellationToken cancellationToken DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken); if (dataFile == null) throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist."); - dataFiles.Add(new CorpusFile { File = dataFile, TextId = file.TextId }); + dataFiles.Add(new CorpusFile { FileId = file.FileId, TextId = file.TextId }); } return dataFiles; } @@ -197,18 +197,6 @@ private CorpusDto Map(Corpus source) private CorpusFileDto Map(CorpusFile source) { - return new CorpusFileDto { File = Map(source.File), TextId = source.TextId }; - } - - private DataFileDto Map(DataFile source) - { - return new DataFileDto - { - Id = source.Id, - Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }), - Name = source.Name, - Format = source.Format, - Revision = source.Revision - }; + return new CorpusFileDto { FileId = source.FileId, TextId = source.TextId }; } } diff --git a/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs index a4311e39..277c9ccd 100644 --- a/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs +++ b/src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs @@ -2,6 +2,6 @@ namespace Serval.DataFiles.Models; public record CorpusFile { - public required DataFile File { get; init; } + public required string FileId { get; init; } public string? TextId { get; init; } } diff --git a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs index f5b8e4b6..61622a93 100644 --- a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs +++ b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs @@ -1,7 +1,17 @@ namespace Serval.DataFiles.Services; -public class CorpusService(IRepository corpora) : OwnedEntityServiceBase(corpora), ICorpusService +public class CorpusService( + IRepository corpora, + IDataAccessContext dataAccessContext, + IDataFileService dataFileService, + IScopedMediator mediator +) : OwnedEntityServiceBase(corpora), ICorpusService { + private readonly IDataAccessContext _dataAccessContext = dataAccessContext; + private readonly IDataFileService _dataFileService = dataFileService; + + private readonly IScopedMediator _mediator = mediator; + public async Task GetAsync(string id, string owner, CancellationToken cancellationToken = default) { Corpus? corpus = await Entities.GetAsync(c => c.Id == id && c.Owner == owner, cancellationToken); @@ -16,13 +26,44 @@ public async Task UpdateAsync( CancellationToken cancellationToken = default ) { - Corpus? corpus = await Entities.UpdateAsync( - c => c.Id == id, - u => u.Set(c => c.Files, files), - cancellationToken: cancellationToken + return await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + Corpus? corpus = await Entities.UpdateAsync( + c => c.Id == id, + u => u.Set(c => c.Files, files), + cancellationToken: cancellationToken + ); + if (corpus is null) + throw new EntityNotFoundException($"Could not find Corpus '{id}."); + await _mediator.Publish( + new CorpusUpdated + { + CorpusId = corpus.Id, + Files = await Task.WhenAll( + corpus.Files.Select(async f => new CorpusFileResult + { + TextId = f.TextId!, + File = Map(await _dataFileService.GetAsync(f.FileId)) + }) + ) + }, + ct + ); + return corpus; + }, + cancellationToken ); - if (corpus is null) - throw new EntityNotFoundException($"Could not find Corpus '{id}."); - return corpus; + } + + private static DataFileResult Map(DataFile dataFile) + { + return new DataFileResult + { + DataFileId = dataFile.Id, + Name = dataFile.Name, + Filename = dataFile.Filename, + Format = dataFile.Format, + }; } } diff --git a/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs b/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs index 6df3a01f..b7810f1c 100644 --- a/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs +++ b/src/Serval/src/Serval.DataFiles/Services/DataFileService.cs @@ -89,6 +89,7 @@ await _deletedFiles.InsertAsync( cancellationToken: ct ); } + await _mediator.Publish(new DataFileUpdated { DataFileId = id, Filename = filename }, ct); }, cancellationToken: cancellationToken ); diff --git a/src/Serval/src/Serval.Shared/Contracts/CorpusUpdated.cs b/src/Serval/src/Serval.Shared/Contracts/CorpusUpdated.cs new file mode 100644 index 00000000..402658f3 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/CorpusUpdated.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record CorpusUpdated +{ + public required string CorpusId { get; init; } + public required IReadOnlyList Files { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/DataFileUpdated.cs b/src/Serval/src/Serval.Shared/Contracts/DataFileUpdated.cs index 1d32e196..7968f28a 100644 --- a/src/Serval/src/Serval.Shared/Contracts/DataFileUpdated.cs +++ b/src/Serval/src/Serval.Shared/Contracts/DataFileUpdated.cs @@ -3,4 +3,5 @@ public record DataFileUpdated { public required string DataFileId { get; init; } + public required string Filename { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Configuration/IMediatorRegistrationConfiguratorExtensions.cs b/src/Serval/src/Serval.Translation/Configuration/IMediatorRegistrationConfiguratorExtensions.cs index 1397ed4d..d9b91b0f 100644 --- a/src/Serval/src/Serval.Translation/Configuration/IMediatorRegistrationConfiguratorExtensions.cs +++ b/src/Serval/src/Serval.Translation/Configuration/IMediatorRegistrationConfiguratorExtensions.cs @@ -7,6 +7,8 @@ this IMediatorRegistrationConfigurator configurator ) { configurator.AddConsumer(); + configurator.AddConsumer(); + configurator.AddConsumer(); return configurator; } } diff --git a/src/Serval/src/Serval.Translation/Consumers/CorpusUpdatedConsumer.cs b/src/Serval/src/Serval.Translation/Consumers/CorpusUpdatedConsumer.cs new file mode 100644 index 00000000..366e8099 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Consumers/CorpusUpdatedConsumer.cs @@ -0,0 +1,26 @@ +namespace Serval.Translation.Consumers; + +public class CorpusUpdatedConsumer(IEngineService engineService) : IConsumer +{ + private readonly IEngineService _engineService = engineService; + + public async Task Consume(ConsumeContext context) + { + await _engineService.UpdateCorpusFilesAsync( + context.Message.CorpusId, + context.Message.Files.Select(Map).ToList(), + context.CancellationToken + ); + } + + private static CorpusFile Map(CorpusFileResult corpusFile) + { + return new CorpusFile + { + Id = corpusFile.File.DataFileId, + TextId = corpusFile.TextId, + Filename = corpusFile.File.Filename, + Format = corpusFile.File.Format, + }; + } +} diff --git a/src/Serval/src/Serval.Translation/Consumers/DataFileUpdatedConsumer.cs b/src/Serval/src/Serval.Translation/Consumers/DataFileUpdatedConsumer.cs new file mode 100644 index 00000000..b75572f7 --- /dev/null +++ b/src/Serval/src/Serval.Translation/Consumers/DataFileUpdatedConsumer.cs @@ -0,0 +1,15 @@ +namespace Serval.Translation.Consumers; + +public class DataFileUpdatedConsumer(IEngineService engineService) : IConsumer +{ + private readonly IEngineService _engineService = engineService; + + public async Task Consume(ConsumeContext context) + { + await _engineService.UpdateDataFileFilenameFilesAsync( + context.Message.DataFileId, + context.Message.Filename, + context.CancellationToken + ); + } +} diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index 5e653059..53932848 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -561,11 +561,84 @@ public Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cance e => e.Corpora.Any(c => c.SourceFiles.Any(f => f.Id == dataFileId) || c.TargetFiles.Any(f => f.Id == dataFileId) + ) + || e.ParallelCorpora.Any(c => + c.SourceCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) + || c.TargetCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) ), u => u.RemoveAll(e => e.Corpora[ArrayPosition.All].SourceFiles, f => f.Id == dataFileId) - .RemoveAll(e => e.Corpora[ArrayPosition.All].TargetFiles, f => f.Id == dataFileId), - cancellationToken + .RemoveAll(e => e.Corpora[ArrayPosition.All].TargetFiles, f => f.Id == dataFileId) + .RemoveAll( + e => e.ParallelCorpora[ArrayPosition.All].SourceCorpora[ArrayPosition.All].Files, + f => f.Id == dataFileId + ) + .RemoveAll( + e => e.ParallelCorpora[ArrayPosition.All].TargetCorpora[ArrayPosition.All].Files, + f => f.Id == dataFileId + ), + cancellationToken: cancellationToken + ); + } + + public Task UpdateDataFileFilenameFilesAsync( + string dataFileId, + string filename, + CancellationToken cancellationToken = default + ) + { + return Entities.UpdateAllAsync( + e => + e.Corpora.Any(c => + c.SourceFiles.Any(f => f.Id == dataFileId) || c.TargetFiles.Any(f => f.Id == dataFileId) + ) + || e.ParallelCorpora.Any(c => + c.SourceCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) + || c.TargetCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) + ), + u => + u.Set(e => e.Corpora[ArrayPosition.All].SourceFiles[ArrayPosition.ArrayFilter].Filename, filename) + .Set(e => e.Corpora[ArrayPosition.All].TargetFiles[ArrayPosition.ArrayFilter].Filename, filename) + .Set( + e => + e.ParallelCorpora[ArrayPosition.All] + .SourceCorpora[ArrayPosition.All] + .Files[ArrayPosition.ArrayFilter] + .Filename, + filename + ) + .Set( + e => + e.ParallelCorpora[ArrayPosition.All] + .TargetCorpora[ArrayPosition.All] + .Files[ArrayPosition.ArrayFilter] + .Filename, + filename + ), + jsonArrayFilterDefinition: $"{{ \"arrayFilter._id\": {{$eq: ObjectId(\"{dataFileId}\") }} }}", + cancellationToken: cancellationToken + ); + } + + public Task UpdateCorpusFilesAsync( + string corpusId, + IReadOnlyList files, + CancellationToken cancellationToken = default + ) + { + return Entities.UpdateAllAsync( + e => + e.ParallelCorpora.Any(c => + c.SourceCorpora.Any(mc => mc.Id == corpusId) || c.TargetCorpora.Any(mc => mc.Id == corpusId) + ), + u => + u.Set(e => e.ParallelCorpora[ArrayPosition.All].SourceCorpora[ArrayPosition.ArrayFilter].Files, files) + .Set( + e => e.ParallelCorpora[ArrayPosition.All].TargetCorpora[ArrayPosition.ArrayFilter].Files, + files + ), + jsonArrayFilterDefinition: $"{{ \"arrayFilter._id\": {{$eq: ObjectId(\"{corpusId}\") }} }}", + cancellationToken: cancellationToken ); } diff --git a/src/Serval/src/Serval.Translation/Services/IEngineService.cs b/src/Serval/src/Serval.Translation/Services/IEngineService.cs index 6497ac1a..c25d522c 100644 --- a/src/Serval/src/Serval.Translation/Services/IEngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/IEngineService.cs @@ -68,6 +68,18 @@ Task DeleteParallelCorpusAsync( Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cancellationToken = default); + Task UpdateDataFileFilenameFilesAsync( + string dataFileId, + string filename, + CancellationToken cancellationToken = default + ); + + Task UpdateCorpusFilesAsync( + string corpusId, + IReadOnlyList files, + CancellationToken cancellationToken = default + ); + Task GetQueueAsync(string engineType, CancellationToken cancellationToken = default); Task GetLanguageInfoAsync( diff --git a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs index d66b3557..f0cdbdff 100644 --- a/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs +++ b/src/Serval/test/Serval.ApiServer.IntegrationTests/TranslationEngineTests.cs @@ -19,11 +19,11 @@ public class TranslationEngineTests TargetLanguage = "en", SourceFiles = { - new TranslationCorpusFileConfig { FileId = FILE1_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE1_SRC_ID, TextId = "all" } }, TargetFiles = { - new TranslationCorpusFileConfig { FileId = FILE2_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE2_TRG_ID, TextId = "all" } } }; private static readonly TranslationParallelCorpusConfig TestParallelCorpusConfig = @@ -49,11 +49,11 @@ public class TranslationEngineTests TargetLanguage = "es", SourceFiles = { - new TranslationCorpusFileConfig { FileId = FILE1_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE1_SRC_ID, TextId = "all" } }, TargetFiles = { - new TranslationCorpusFileConfig { FileId = FILE2_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE2_TRG_ID, TextId = "all" } } }; @@ -63,8 +63,8 @@ public class TranslationEngineTests Name = "TestCorpus", SourceLanguage = "en", TargetLanguage = "en", - SourceFiles = { new TranslationCorpusFileConfig { FileId = FILE3_ID } }, - TargetFiles = { new TranslationCorpusFileConfig { FileId = FILE4_ID } } + SourceFiles = { new TranslationCorpusFileConfig { FileId = FILE3_SRC_ZIP_ID } }, + TargetFiles = { new TranslationCorpusFileConfig { FileId = FILE4_TRG_ZIP_ID } } }; private const string ECHO_ENGINE1_ID = "e00000000000000000000001"; @@ -72,13 +72,13 @@ public class TranslationEngineTests private const string ECHO_ENGINE3_ID = "e00000000000000000000003"; private const string SMT_ENGINE1_ID = "be0000000000000000000001"; private const string NMT_ENGINE1_ID = "ce0000000000000000000001"; - private const string FILE1_ID = "f00000000000000000000001"; + private const string FILE1_SRC_ID = "f00000000000000000000001"; private const string FILE1_FILENAME = "file_a"; - private const string FILE2_ID = "f00000000000000000000002"; + private const string FILE2_TRG_ID = "f00000000000000000000002"; private const string FILE2_FILENAME = "file_b"; - private const string FILE3_ID = "f00000000000000000000003"; + private const string FILE3_SRC_ZIP_ID = "f00000000000000000000003"; private const string FILE3_FILENAME = "file_c"; - private const string FILE4_ID = "f00000000000000000000004"; + private const string FILE4_TRG_ZIP_ID = "f00000000000000000000004"; private const string FILE4_FILENAME = "file_d"; private const string SOURCE_CORPUS_ID_1 = "cc0000000000000000000001"; private const string SOURCE_CORPUS_ID_2 = "cc0000000000000000000002"; @@ -147,7 +147,7 @@ public async Task SetUp() var srcFile = new DataFiles.Models.DataFile { - Id = FILE1_ID, + Id = FILE1_SRC_ID, Owner = "client1", Name = "src.txt", Filename = FILE1_FILENAME, @@ -155,7 +155,7 @@ public async Task SetUp() }; var trgFile = new DataFiles.Models.DataFile { - Id = FILE2_ID, + Id = FILE2_TRG_ID, Owner = "client1", Name = "trg.txt", Filename = FILE2_FILENAME, @@ -163,7 +163,7 @@ public async Task SetUp() }; var srcParatextFile = new DataFiles.Models.DataFile { - Id = FILE3_ID, + Id = FILE3_SRC_ZIP_ID, Owner = "client1", Name = "src.zip", Filename = FILE3_FILENAME, @@ -171,7 +171,7 @@ public async Task SetUp() }; var trgParatextFile = new DataFiles.Models.DataFile { - Id = FILE4_ID, + Id = FILE4_TRG_ZIP_ID, Owner = "client1", Name = "trg.zip", Filename = FILE4_FILENAME, @@ -184,21 +184,21 @@ public async Task SetUp() Id = SOURCE_CORPUS_ID_1, Language = "en", Owner = "client1", - Files = [new() { File = srcFile, TextId = "all" }] + Files = [new() { FileId = srcFile.Id, TextId = "all" }] }; var srcCorpus2 = new DataFiles.Models.Corpus { Id = SOURCE_CORPUS_ID_2, Language = "en", Owner = "client1", - Files = [new() { File = srcFile, TextId = "all" }] + Files = [new() { FileId = srcFile.Id, TextId = "all" }] }; var trgCorpus = new DataFiles.Models.Corpus { Id = TARGET_CORPUS_ID, Language = "en", Owner = "client1", - Files = [new() { File = trgFile, TextId = "all" }] + Files = [new() { FileId = trgFile.Id, TextId = "all" }] }; await _env.Corpora.InsertAllAsync([srcCorpus, srcCorpus2, trgCorpus]); } @@ -594,8 +594,8 @@ public async Task AddCorpusToEngineByIdAsync(IEnumerable scope, int expe Assert.Multiple(() => { Assert.That(result.Name, Is.EqualTo("TestCorpus")); - Assert.That(result.SourceFiles.First().File.Id, Is.EqualTo(FILE1_ID)); - Assert.That(result.TargetFiles.First().File.Id, Is.EqualTo(FILE2_ID)); + Assert.That(result.SourceFiles.First().File.Id, Is.EqualTo(FILE1_SRC_ID)); + Assert.That(result.TargetFiles.First().File.Id, Is.EqualTo(FILE2_TRG_ID)); }); Engine? engine = await _env.Engines.GetAsync(engineId); Assert.That(engine, Is.Not.Null); @@ -651,11 +651,11 @@ string engineId TranslationCorpus result = await client.AddCorpusAsync(engineId, TestCorpusConfig); TranslationCorpusFileConfig[] src = new[] { - new TranslationCorpusFileConfig { FileId = FILE2_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE2_TRG_ID, TextId = "all" } }; TranslationCorpusFileConfig[] trg = new[] { - new TranslationCorpusFileConfig { FileId = FILE1_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE1_SRC_ID, TextId = "all" } }; var updateConfig = new TranslationCorpusUpdateConfig { SourceFiles = src, TargetFiles = trg }; await client.UpdateCorpusAsync(engineId, result.Id, updateConfig); @@ -675,11 +675,11 @@ string engineId { TranslationCorpusFileConfig[] src = new[] { - new TranslationCorpusFileConfig { FileId = FILE2_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE2_TRG_ID, TextId = "all" } }; TranslationCorpusFileConfig[] trg = new[] { - new TranslationCorpusFileConfig { FileId = FILE1_ID, TextId = "all" } + new TranslationCorpusFileConfig { FileId = FILE1_SRC_ID, TextId = "all" } }; var updateConfig = new TranslationCorpusUpdateConfig { SourceFiles = src, TargetFiles = trg }; await client.UpdateCorpusAsync(engineId, DOES_NOT_EXIST_CORPUS_ID, updateConfig); @@ -2031,6 +2031,56 @@ public void GetLanguageInfo_Error() Assert.That(ex.StatusCode, Is.EqualTo(403)); } + [Test] + public async Task DataFileUpdate_Propagated() + { + TranslationEnginesClient translationClient = _env.CreateTranslationEnginesClient(); + DataFilesClient dataFilesClient = _env.CreateDataFilesClient(); + CorporaClient corporaClient = _env.CreateCorporaClient(); + await translationClient.AddCorpusAsync(ECHO_ENGINE1_ID, TestCorpusConfig); + await translationClient.AddParallelCorpusAsync(ECHO_ENGINE2_ID, TestParallelCorpusConfig); + + // Get the original files + DataFile orgFileFromClient = await dataFilesClient.GetAsync(FILE1_SRC_ID); + DataFiles.Models.DataFile orgFileFromRepo = (await _env.DataFiles.GetAsync(FILE1_SRC_ID))!; + DataFiles.Models.Corpus orgCorpusFromRepo = (await _env.Corpora.GetAsync(TARGET_CORPUS_ID))!; + Assert.That(orgFileFromClient.Name, Is.EqualTo(orgFileFromRepo.Name)); + Assert.That(orgCorpusFromRepo.Files[0].FileId, Is.EqualTo(FILE2_TRG_ID)); + + // Update the file + await dataFilesClient.UpdateAsync( + FILE1_SRC_ID, + new FileParameter(new MemoryStream(new byte[] { 1, 2, 3 }), "test.txt") + ); + await corporaClient.UpdateAsync( + TARGET_CORPUS_ID, + [new CorpusFileConfig { FileId = FILE4_TRG_ZIP_ID, TextId = "all" }] + ); + + // Confirm the change is propagated everywhere + DataFiles.Models.DataFile newFileFromRepo = (await _env.DataFiles.GetAsync(FILE1_SRC_ID))!; + Assert.That(newFileFromRepo.Filename, Is.Not.EqualTo(orgFileFromRepo.Filename)); + + Engine newEngine1 = (await _env.Engines.GetAsync(ECHO_ENGINE1_ID))!; + Engine newEngine2 = (await _env.Engines.GetAsync(ECHO_ENGINE2_ID))!; + + // Updated (legacy) Corpus file filename + Assert.That(newEngine1.Corpora[0].SourceFiles[0].Filename, Is.EqualTo(newFileFromRepo.Filename)); + Assert.That(newEngine1.Corpora[0].TargetFiles[0].Filename, Is.EqualTo(FILE2_FILENAME)); + + // Updated parallel corpus file filename + Assert.That( + newEngine2.ParallelCorpora[0].SourceCorpora[0].Files[0].Filename, + Is.EqualTo(newFileFromRepo.Filename) + ); + + // Updated set of new corpus files + Assert.That(newEngine2.ParallelCorpora[0].TargetCorpora[0].Id, Is.EqualTo(TARGET_CORPUS_ID)); + Assert.That(newEngine2.ParallelCorpora[0].TargetCorpora[0].Files[0].Id, Is.EqualTo(FILE4_TRG_ZIP_ID)); + Assert.That(newEngine2.ParallelCorpora[0].TargetCorpora[0].Files[0].Filename, Is.EqualTo(FILE4_FILENAME)); + Assert.That(newEngine2.ParallelCorpora[0].TargetCorpora[0].Files.Count, Is.EqualTo(1)); + } + [TearDown] public void TearDown() { @@ -2235,13 +2285,13 @@ public TestEnvironment() public TranslationEnginesClient CreateTranslationEnginesClient(IEnumerable? scope = null) { - scope ??= new[] - { + scope ??= + [ Scopes.CreateTranslationEngines, Scopes.ReadTranslationEngines, Scopes.UpdateTranslationEngines, Scopes.DeleteTranslationEngines - }; + ]; HttpClient httpClient = Factory .WithWebHostBuilder(builder => { @@ -2265,13 +2315,13 @@ public TranslationEnginesClient CreateTranslationEnginesClient(IEnumerable? scope = null) { - scope ??= new[] - { + scope ??= + [ Scopes.CreateTranslationEngines, Scopes.ReadTranslationEngines, Scopes.UpdateTranslationEngines, Scopes.DeleteTranslationEngines - }; + ]; HttpClient httpClient = Factory .WithWebHostBuilder(builder => { @@ -2300,6 +2350,32 @@ public TranslationEngineTypesClient CreateTranslationEngineTypesClient(IEnumerab return new TranslationEngineTypesClient(httpClient); } + public DataFilesClient CreateDataFilesClient() + { + IEnumerable scope = [Scopes.DeleteFiles, Scopes.ReadFiles, Scopes.UpdateFiles, Scopes.CreateFiles]; + HttpClient httpClient = Factory + .WithWebHostBuilder(builder => + { + builder.ConfigureTestServices(services => + { + services.AddTransient(CreateFileSystem); + }); + }) + .CreateClient(); + if (scope is not null) + httpClient.DefaultRequestHeaders.Add("Scope", string.Join(" ", scope)); + return new DataFilesClient(httpClient); + } + + public CorporaClient CreateCorporaClient() + { + IEnumerable scope = [Scopes.DeleteFiles, Scopes.ReadFiles, Scopes.UpdateFiles, Scopes.CreateFiles]; + HttpClient httpClient = Factory.WithWebHostBuilder(_ => { }).CreateClient(); + if (scope is not null) + httpClient.DefaultRequestHeaders.Add("Scope", string.Join(" ", scope)); + return new CorporaClient(httpClient); + } + public void ResetDatabases() { _mongoClient.DropDatabase("serval_test"); @@ -2337,6 +2413,7 @@ private static IFileSystem CreateFileSystem(IServiceProvider sp) target.EntryExists("MATTRG.SFM").Returns(false); return target; }); + fileSystem.OpenWrite(Arg.Any()).Returns(ci => new MemoryStream()); return fileSystem; } diff --git a/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs b/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs index 22cdd14e..a9e498b4 100644 --- a/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs +++ b/src/Serval/test/Serval.DataFiles.Tests/Services/CorpusServiceTests.cs @@ -21,7 +21,7 @@ public class CorpusServiceTests Owner = "owner1", Name = "corpus1", Language = "en", - Files = new List() { new() { File = DefaultDataFile } } + Files = new List() { new() { FileId = DefaultDataFile.Id } } }; [Test] @@ -47,11 +47,25 @@ private class TestEnvironment public TestEnvironment() { Corpora = new MemoryRepository(); - Service = new CorpusService(Corpora); + DataAccessContext = Substitute.For(); + DataAccessContext + .WithTransactionAsync(Arg.Any>>(), Arg.Any()) + .Returns(x => + { + return ((Func>)x[0])((CancellationToken)x[1]); + }); + Service = new CorpusService( + Corpora, + DataAccessContext, + Substitute.For(), + Substitute.For() + ); } public MemoryRepository Corpora { get; } public CorpusService Service { get; } + + public IDataAccessContext DataAccessContext { get; } } }