From 8e0d367b9900b017d3eeaa4a606daed4a737fb01 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 21 Nov 2024 22:08:03 -0500 Subject: [PATCH] Lots of broken. --- .../appsettings.json | 3 + .../Serval.Machine.JobServer/appsettings.json | 3 + .../IMachineBuilderExtensions.cs | 39 +++++--- .../IServiceCollectionExtensions.cs | 1 + ...tions.cs => WordAlignmentEngineOptions.cs} | 4 +- .../WordAlignmentModelOptions.cs | 14 +++ .../Serval.Machine.Shared.csproj | 2 + .../Services/IModelFactory.cs | 15 +++ .../Services/IWordAlignmentModelFactory.cs | 16 +++ .../Services/ModelFactoryBase.cs | 74 ++++++++++++++ .../ServalWordAlignmentEngineServiceV1.cs | 4 +- .../Services/StatisticalEngineService.cs | 16 +-- .../StatisticalPostprocessBuildJob.cs | 4 +- .../Services/ThotSmtModelFactory.cs | 62 +----------- .../Services/WordAlignmentEngineState.cs | 99 +++++++++++++++++++ .../WordAlignmentEngineStateService.cs | 78 +++++++++++++++ .../Services/WordAlignmentModelFactory.cs | 49 +++++++++ .../src/Serval.Machine.Shared/Usings.cs | 1 + 18 files changed, 397 insertions(+), 87 deletions(-) rename src/Machine/src/Serval.Machine.Shared/Configuration/{StatisticalEngineOptions.cs => WordAlignmentEngineOptions.cs} (82%) create mode 100644 src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs create mode 100644 src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs diff --git a/src/Machine/src/Serval.Machine.EngineServer/appsettings.json b/src/Machine/src/Serval.Machine.EngineServer/appsettings.json index f6dc8eb0..03344cef 100644 --- a/src/Machine/src/Serval.Machine.EngineServer/appsettings.json +++ b/src/Machine/src/Serval.Machine.EngineServer/appsettings.json @@ -38,6 +38,9 @@ "SmtTransferEngine": { "EnginesDir": "/var/lib/machine/engines" }, + "WordAlignmentEngine": { + "EnginesDir": "/var/lib/machine/engines" + }, "ClearML": { "BuildPollingEnabled": true }, diff --git a/src/Machine/src/Serval.Machine.JobServer/appsettings.json b/src/Machine/src/Serval.Machine.JobServer/appsettings.json index e6c1dda9..95df90ca 100644 --- a/src/Machine/src/Serval.Machine.JobServer/appsettings.json +++ b/src/Machine/src/Serval.Machine.JobServer/appsettings.json @@ -39,6 +39,9 @@ "SmtTransferEngine": { "EnginesDir": "/var/lib/machine/engines" }, + "WordAlignmentEngine": { + "EnginesDir": "/var/lib/machine/engines" + }, "ClearML": { "BuildPollingEnabled": false }, diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs index 062da3ec..c6100b47 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs @@ -18,6 +18,12 @@ public static IMachineBuilder AddSmtTransferEngineOptions(this IMachineBuilder b return builder; } + public static IMachineBuilder AddWordAlignmentEngineOptions(this IMachineBuilder builder, IConfiguration config) + { + builder.Services.Configure(config); + return builder; + } + public static IMachineBuilder AddClearMLOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); @@ -178,6 +184,8 @@ public static IMachineBuilder AddHangfireJobServer( switch (engineType) { case EngineType.SmtTransfer: + builder.Services.AddSingleton(); + builder.Services.AddHostedService(); builder.AddThot(); queues.Add("smt_transfer"); break; @@ -185,6 +193,8 @@ public static IMachineBuilder AddHangfireJobServer( queues.Add("nmt"); break; case EngineType.Statistical: + builder.Services.AddSingleton(); + builder.Services.AddHostedService(); builder.AddThot(); queues.Add("statistical"); break; @@ -454,6 +464,8 @@ public static IMachineBuilder AddServalWordAlignmentEngineService( switch (engineType) { case EngineType.Statistical: + builder.Services.AddSingleton(); + builder.Services.AddHostedService(); builder.AddThot(); builder.Services.AddScoped(); break; @@ -469,8 +481,6 @@ public static IMachineBuilder AddThot(this IMachineBuilder builder) { try { - builder.Services.AddSingleton(); - builder.Services.AddHostedService(); builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser(); } catch (ArgumentException) @@ -480,7 +490,7 @@ public static IMachineBuilder AddThot(this IMachineBuilder builder) return builder; } - public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, string? smtTransferEngineDir = null) + public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) { builder.Services.AddScoped, TranslationBuildJobService>(); builder.Services.AddScoped, BuildJobService>(); @@ -499,21 +509,22 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, s builder.Services.AddScoped(); builder.Services.AddScoped(); - if (smtTransferEngineDir is null) - { - var smtTransferEngineOptions = new SmtTransferEngineOptions(); - builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); - smtTransferEngineDir = smtTransferEngineOptions.EnginesDir; - } - string? driveLetter = Path.GetPathRoot(smtTransferEngineDir)?[..1]; - if (driveLetter is null) - throw new InvalidOperationException("SMT Engine directory is required"); + var smtTransferEngineOptions = new SmtTransferEngineOptions(); + builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); + string? smtDriveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)?[..1]; + var statisticsEngineOptions = new WordAlignmentEngineOptions(); + builder.Configuration.GetSection(WordAlignmentEngineOptions.Key).Bind(statisticsEngineOptions); + string? statisticsDriveLetter = Path.GetPathRoot(statisticsEngineOptions.EnginesDir)?[..1]; + if (smtDriveLetter is null || statisticsDriveLetter is null) + throw new InvalidOperationException("SMT Engine and Statistical directory is required"); + if (smtDriveLetter != statisticsDriveLetter) + throw new InvalidOperationException("SMT Engine and Statistical directory must be on the same drive"); // add health check for disk storage capacity builder .Services.AddHealthChecks() .AddDiskStorageHealthCheck( - x => x.AddDrive(driveLetter, 1_000), // 1GB - "SMT Engine Storage Capacity", + x => x.AddDrive(smtDriveLetter, 1_000), // 1GB + "SMT and Statistical Engine Storage Capacity", HealthStatus.Degraded ); diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs index c72302b9..ee4f26ee 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs @@ -25,6 +25,7 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key)); builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key)); builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key)); + builder.AddWordAlignmentEngineOptions(configuration.GetSection(WordAlignmentEngineOptions.Key)); builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key)); builder.AddDistributedReaderWriterLockOptions(configuration.GetSection(DistributedReaderWriterLockOptions.Key)); builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key)); diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/StatisticalEngineOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentEngineOptions.cs similarity index 82% rename from src/Machine/src/Serval.Machine.Shared/Configuration/StatisticalEngineOptions.cs rename to src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentEngineOptions.cs index 68254e4c..fd692afb 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/StatisticalEngineOptions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentEngineOptions.cs @@ -1,8 +1,8 @@ namespace Serval.Machine.Shared.Configuration; -public class StatisticalEngineOptions +public class WordAlignmentEngineOptions { - public const string Key = "StatisticalEngine"; + public const string Key = "WordAlignmentEngine"; public string EnginesDir { get; set; } = "word_alignment_engines"; public TimeSpan EngineCommitFrequency { get; set; } = TimeSpan.FromMinutes(5); diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs new file mode 100644 index 00000000..ec0223df --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs @@ -0,0 +1,14 @@ +namespace Serval.Machine.Shared.Configuration; + +public class WordAlignmentModelOptions +{ + public const string Key = "WordAlignmentModel"; + + public WordAlignmentModelOptions() + { + string installDir = Path.GetDirectoryName(Assembly.GetEntryAssembly()!.Location)!; + NewModelFile = Path.Combine(installDir, "thot-new-model.zip"); + } + + public string NewModelFile { get; set; } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj index b9985198..874cb0cd 100644 --- a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj +++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj @@ -39,6 +39,7 @@ + @@ -49,6 +50,7 @@ + diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs new file mode 100644 index 00000000..61ec1489 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs @@ -0,0 +1,15 @@ +namespace Serval.Machine.Shared.Services; + +public interface IModelFactory +{ + ITrainer CreateTrainer( + string engineDir, + IRangeTokenizer tokenizer, + IParallelTextCorpus corpus + ); + + void InitNew(string engineDir); + void Cleanup(string engineDir); + Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default); + Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs new file mode 100644 index 00000000..e66914d6 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs @@ -0,0 +1,16 @@ +namespace Serval.Machine.Shared.Services; + +public interface IWordAlignmentModelFactory +{ + IWordAlignmentModel Create(string engineDir, string modelType); + ITrainer CreateTrainer( + string engineDir, + string modelType, + ITokenizer tokenizer, + IParallelTextCorpus corpus + ); + void InitNew(string engineDir); + void Cleanup(string engineDir); + Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default); + Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs b/src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs new file mode 100644 index 00000000..8fea2fbb --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs @@ -0,0 +1,74 @@ +namespace Serval.Machine.Shared.Services; + +public abstract class ModelFactoryBase : IModelFactory +{ + public virtual ITrainer CreateTrainer( + string engineDir, + IRangeTokenizer tokenizer, + IParallelTextCorpus corpus + ) + { + throw new NotImplementedException(); + } + + public virtual void InitNew(string engineDir) + { + throw new NotImplementedException(); + } + + public void Cleanup(string engineDir) + { + if (!Directory.Exists(engineDir)) + return; + DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "lm")); + DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "tm")); + string smtConfigFileName = Path.Combine(engineDir, "smt.cfg"); + if (File.Exists(smtConfigFileName)) + File.Delete(smtConfigFileName); + if (!Directory.EnumerateFileSystemEntries(engineDir).Any()) + Directory.Delete(engineDir); + } + + public async Task UpdateEngineFromAsync( + string engineDir, + Stream source, + CancellationToken cancellationToken = default + ) + { + if (!Directory.Exists(engineDir)) + Directory.CreateDirectory(engineDir); + + await using MemoryStream memoryStream = new(); + await using (GZipStream gzipStream = new(source, CompressionMode.Decompress)) + { + await gzipStream.CopyToAsync(memoryStream, cancellationToken); + } + memoryStream.Seek(0, SeekOrigin.Begin); + await TarFile.ExtractToDirectoryAsync( + memoryStream, + engineDir, + overwriteFiles: true, + cancellationToken: cancellationToken + ); + } + + public async Task SaveEngineToAsync( + string engineDir, + Stream destination, + CancellationToken cancellationToken = default + ) + { + // create zip archive in memory stream + // This cannot be created directly to the shared stream because it all needs to be written at once + await using MemoryStream memoryStream = new(); + await TarFile.CreateFromDirectoryAsync( + engineDir, + memoryStream, + includeBaseDirectory: false, + cancellationToken: cancellationToken + ); + memoryStream.Seek(0, SeekOrigin.Begin); + await using GZipStream gzipStream = new(destination, CompressionMode.Compress); + await memoryStream.CopyToAsync(gzipStream, cancellationToken); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs index a412006b..6023a47e 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs @@ -116,9 +116,9 @@ private static EngineType GetEngineType(string engineTypeStr) ); } - private static WordAlignmentResult Map(TranslationResult source) + private static WordAlignment.V1.WordAlignmentResult Map(TranslationResult source) { - return new WordAlignmentResult + return new WordAlignment.V1.WordAlignmentResult { SourceTokens = { source.SourceTokens }, TargetTokens = { source.TargetTokens }, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs index 9e65398f..3781acd6 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs @@ -5,7 +5,7 @@ public class StatisticalEngineService( IEnumerable platformServices, IDataAccessContext dataAccessContext, IRepository engines, - SmtTransferEngineStateService stateService, + WordAlignmentEngineStateService stateService, IBuildJobService buildJobService, IClearMLQueueService clearMLQueueService ) : IWordAlignmentEngineService @@ -16,7 +16,7 @@ IClearMLQueueService clearMLQueueService ); private readonly IDataAccessContext _dataAccessContext = dataAccessContext; private readonly IRepository _engines = engines; - private readonly SmtTransferEngineStateService _stateService = stateService; + private readonly WordAlignmentEngineStateService _stateService = stateService; private readonly IBuildJobService _buildJobService = buildJobService; private readonly IClearMLQueueService _clearMLQueueService = clearMLQueueService; @@ -47,7 +47,7 @@ public async Task CreateAsync( cancellationToken: cancellationToken ); - SmtTransferEngineState state = _stateService.Get(engineId); + WordAlignmentEngineState state = _stateService.Get(engineId); state.InitNew(); return wordAlignmentEngine; } @@ -60,13 +60,13 @@ public async Task GetBestPhraseAlignmentAsync( ) { WordAlignmentEngine engine = await GetBuiltEngineAsync(engineId, cancellationToken); - SmtTransferEngineState state = _stateService.Get(engineId); + WordAlignmentEngineState state = _stateService.Get(engineId); IDistributedReaderWriterLock @lock = await _lockFactory.CreateAsync(engineId, cancellationToken); TranslationResult result = await @lock.ReaderLockAsync( async ct => { - HybridTranslationEngine hybridEngine = await state.GetHybridEngineAsync(engine.BuildRevision, ct); + HybridTranslationEngine hybridEngine = await state.GetEngineAsync(engine.BuildRevision, ct); // there is no way to cancel this call return hybridEngine.GetBestPhraseAlignment(sourceSegment, targetSegment); }, @@ -92,7 +92,7 @@ await _dataAccessContext.WithTransactionAsync( ); await _buildJobService.DeleteEngineAsync(engineId, CancellationToken.None); - SmtTransferEngineState state = _stateService.Get(engineId); + WordAlignmentEngineState state = _stateService.Get(engineId); _stateService.Remove(engineId); // there is no way to cancel this call state.DeleteData(); @@ -122,7 +122,7 @@ public async Task StartBuildAsync( if (building) throw new InvalidOperationException("The engine is already building or in the process of canceling."); - SmtTransferEngineState state = _stateService.Get(engineId); + WordAlignmentEngineState state = _stateService.Get(engineId); state.Touch(); } @@ -132,7 +132,7 @@ public async Task CancelBuildAsync(string engineId, CancellationToken cancellati if (!building) throw new InvalidOperationException("The engine is not currently building."); - SmtTransferEngineState state = _stateService.Get(engineId); + WordAlignmentEngineState state = _stateService.Get(engineId); state.Touch(); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs index 3a7960e9..cfe8354b 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs @@ -10,7 +10,7 @@ public class StatisticalPostprocessBuildJob( IDistributedReaderWriterLockFactory lockFactory, ISmtModelFactory smtModelFactory, IOptionsMonitor buildOptions, - IOptionsMonitor engineOptions + IOptionsMonitor engineOptions ) : PostprocessBuildJob( platformServices.First(ps => ps.EngineGroup == EngineGroup.WordAlignment), @@ -23,7 +23,7 @@ IOptionsMonitor engineOptions ) { private readonly ISmtModelFactory _smtModelFactory = smtModelFactory; - private readonly IOptionsMonitor _engineOptions = engineOptions; + private readonly IOptionsMonitor _engineOptions = engineOptions; private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; protected override async Task DoWorkAsync( diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs index 03f4ab5d..b856a86a 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs @@ -1,6 +1,6 @@ namespace Serval.Machine.Shared.Services; -public class ThotSmtModelFactory(IOptionsMonitor options) : ISmtModelFactory +public class ThotSmtModelFactory(IOptionsMonitor options) : ModelFactoryBase, ISmtModelFactory { private readonly IOptionsMonitor _options = options; @@ -24,7 +24,7 @@ ITruecaser truecaser return model; } - public ITrainer CreateTrainer( + public override ITrainer CreateTrainer( string engineDir, IRangeTokenizer tokenizer, IParallelTextCorpus corpus @@ -41,66 +41,10 @@ IParallelTextCorpus corpus return trainer; } - public void InitNew(string engineDir) + public override void InitNew(string engineDir) { if (!Directory.Exists(engineDir)) Directory.CreateDirectory(engineDir); ZipFile.ExtractToDirectory(_options.CurrentValue.NewModelFile, engineDir); } - - public void Cleanup(string engineDir) - { - if (!Directory.Exists(engineDir)) - return; - DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "lm")); - DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "tm")); - string smtConfigFileName = Path.Combine(engineDir, "smt.cfg"); - if (File.Exists(smtConfigFileName)) - File.Delete(smtConfigFileName); - if (!Directory.EnumerateFileSystemEntries(engineDir).Any()) - Directory.Delete(engineDir); - } - - public async Task UpdateEngineFromAsync( - string engineDir, - Stream source, - CancellationToken cancellationToken = default - ) - { - if (!Directory.Exists(engineDir)) - Directory.CreateDirectory(engineDir); - - await using MemoryStream memoryStream = new(); - await using (GZipStream gzipStream = new(source, CompressionMode.Decompress)) - { - await gzipStream.CopyToAsync(memoryStream, cancellationToken); - } - memoryStream.Seek(0, SeekOrigin.Begin); - await TarFile.ExtractToDirectoryAsync( - memoryStream, - engineDir, - overwriteFiles: true, - cancellationToken: cancellationToken - ); - } - - public async Task SaveEngineToAsync( - string engineDir, - Stream destination, - CancellationToken cancellationToken = default - ) - { - // create zip archive in memory stream - // This cannot be created directly to the shared stream because it all needs to be written at once - await using MemoryStream memoryStream = new(); - await TarFile.CreateFromDirectoryAsync( - engineDir, - memoryStream, - includeBaseDirectory: false, - cancellationToken: cancellationToken - ); - memoryStream.Seek(0, SeekOrigin.Begin); - await using GZipStream gzipStream = new(destination, CompressionMode.Compress); - await memoryStream.CopyToAsync(gzipStream, cancellationToken); - } } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs new file mode 100644 index 00000000..0f55feb7 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs @@ -0,0 +1,99 @@ +using SIL.ObjectModel; + +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentEngineState( + IWordAlignmentModelFactory wordAlignmentModelFactory, + IOptionsMonitor options, + string engineId +) : DisposableBase +{ + private readonly IWordAlignmentModelFactory _wordAlignmentModelFactory = wordAlignmentModelFactory; + private readonly IOptionsMonitor _options = options; + private readonly AsyncLock _lock = new(); + + private IWordAlignmentEngine? _wordAlignmentEngine; + + public string EngineId { get; } = engineId; + + public bool IsUpdated { get; set; } + public int CurrentBuildRevision { get; set; } = -1; + public DateTime LastUsedTime { get; private set; } = DateTime.UtcNow; + public bool IsLoaded => _wordAlignmentEngine != null; + + private string EngineDir => Path.Combine(_options.CurrentValue.EnginesDir, EngineId); + + public void InitNew() + { + _wordAlignmentModelFactory.InitNew(EngineDir); + } + + public async Task GetEngineAsync( + int buildRevision, + CancellationToken cancellationToken = default + ) + { + using (await _lock.LockAsync(cancellationToken)) + { + if (_wordAlignmentEngine is not null && CurrentBuildRevision != -1 && buildRevision != CurrentBuildRevision) + { + IsUpdated = false; + Unload(); + } + + if (_wordAlignmentEngine is null) + { + LatinWordTokenizer tokenizer = new(); + LatinWordDetokenizer detokenizer = new(); + _wordAlignmentEngine = _wordAlignmentModelFactory.Create(EngineDir, tokenizer, detokenizer); + } + CurrentBuildRevision = buildRevision; + return _wordAlignmentEngine; + } + } + + public void DeleteData() + { + Unload(); + _wordAlignmentModelFactory.Cleanup(EngineDir); + } + + public void Commit(int buildRevision, TimeSpan inactiveTimeout) + { + if (_wordAlignmentEngine is null) + return; + + if (CurrentBuildRevision == -1) + CurrentBuildRevision = buildRevision; + if (buildRevision != CurrentBuildRevision) + { + Unload(); + CurrentBuildRevision = buildRevision; + } + else if (DateTime.UtcNow - LastUsedTime > inactiveTimeout) + { + Unload(); + } + } + + public void Touch() + { + LastUsedTime = DateTime.UtcNow; + } + + private void Unload() + { + if (_wordAlignmentEngine is null) + return; + + _wordAlignmentEngine.Dispose(); + + _wordAlignmentEngine = null; + CurrentBuildRevision = -1; + } + + protected override void DisposeManagedResources() + { + Unload(); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs new file mode 100644 index 00000000..b80a782e --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs @@ -0,0 +1,78 @@ +using SIL.ObjectModel; + +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentEngineStateService( + ISmtModelFactory smtModelFactory, + ITransferEngineFactory transferEngineFactory, + IOptionsMonitor options, + ILogger logger +) : DisposableBase +{ + private readonly ISmtModelFactory _smtModelFactory = smtModelFactory; + private readonly ITransferEngineFactory _transferEngineFactory = transferEngineFactory; + private readonly IOptionsMonitor _options = options; + private readonly ILogger _logger = logger; + + private readonly ConcurrentDictionary _engineStates = + new ConcurrentDictionary(); + + public WordAlignmentEngineState Get(string engineId) + { + return _engineStates.GetOrAdd(engineId, CreateState); + } + + public void Remove(string engineId) + { + _engineStates.TryRemove(engineId, out _); + } + + public async Task CommitAsync( + IDistributedReaderWriterLockFactory lockFactory, + IRepository engines, + TimeSpan inactiveTimeout, + CancellationToken cancellationToken = default + ) + { + foreach (SmtTransferEngineState state in _engineStates.Values) + { + try + { + IDistributedReaderWriterLock @lock = await lockFactory.CreateAsync(state.EngineId, cancellationToken); + await @lock.WriterLockAsync( + async ct => + { + TranslationEngine? engine = await engines.GetAsync(state.EngineId, ct); + if (engine is not null && !(engine.CollectTrainSegmentPairs ?? false)) + // there is no way to cancel this call + state.Commit(engine.BuildRevision, inactiveTimeout); + }, + _options.CurrentValue.EngineCommitTimeout, + cancellationToken: cancellationToken + ); + } + catch (Exception e) + { + _logger.LogError(e, "Error occurred while committing SMT transfer engine {EngineId}.", state.EngineId); + } + } + } + + private SmtTransferEngineState CreateState(string engineId) + { + return new SmtTransferEngineState( + _smtModelFactory, + _transferEngineFactory, + _truecaserFactory, + _options, + engineId + ); + } + + protected override void DisposeManagedResources() + { + foreach (SmtTransferEngineState state in _engineStates.Values) + state.Dispose(); + _engineStates.Clear(); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs new file mode 100644 index 00000000..e28bb2d7 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs @@ -0,0 +1,49 @@ +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentModelFactory(IOptionsMonitor options) + : ModelFactoryBase, + IWordAlignmentModelFactory +{ + private readonly IOptionsMonitor _options = options; + + public IWordAlignmentModel Create(string engineDir, string modelType) + { + var modelPath = Path.Combine(engineDir, "tm", "src_trg"); + ThotWordAlignmentModelType thotModelType = ThotWordAlignmentHelpers.GetThotWordAlignmentModelType(modelType); + var directModel = ThotWordAlignmentModel.Create(thotModelType); + directModel.Load(modelPath + "_invswm"); + + var inverseModel = ThotWordAlignmentModel.Create(thotModelType); + inverseModel.Load(modelPath + "_swm"); + + return new SymmetrizedWordAlignmentModel(directModel, inverseModel); + } + + public ITrainer CreateTrainer( + string engineDir, + string modelType, + ITokenizer tokenizer, + IParallelTextCorpus corpus + ) + { + var modelPath = Path.Combine(engineDir, "tm", "src_trg"); + ThotWordAlignmentModelType thotModelType = ThotWordAlignmentHelpers.GetThotWordAlignmentModelType(modelType); + var directModel = ThotWordAlignmentModel.Create(thotModelType); + directModel.Load(modelPath + "_invswm"); + + var inverseModel = ThotWordAlignmentModel.Create(thotModelType); + inverseModel.Load(modelPath + "_swm"); + + ITrainer directTrainer = directModel.CreateTrainer(corpus, tokenizer); + ITrainer inverseTrainer = inverseModel.CreateTrainer(corpus.Invert(), tokenizer); + + return new SymmetrizedWordAlignmentModelTrainer(directTrainer, inverseTrainer); + } + + public override void InitNew(string engineDir) + { + if (!Directory.Exists(engineDir)) + Directory.CreateDirectory(engineDir); + ZipFile.ExtractToDirectory(_options.CurrentValue.NewModelFile, engineDir); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Usings.cs b/src/Machine/src/Serval.Machine.Shared/Usings.cs index ea49e89d..80f64e3f 100644 --- a/src/Machine/src/Serval.Machine.Shared/Usings.cs +++ b/src/Machine/src/Serval.Machine.Shared/Usings.cs @@ -48,6 +48,7 @@ global using Serval.Machine.Shared.Services; global using Serval.Machine.Shared.Utils; global using SIL.DataAccess; +global using SIL.Machine; global using SIL.Machine.Corpora; global using SIL.Machine.Morphology.HermitCrab; global using SIL.Machine.Tokenization;