From 5a5dfab194473ac1fd156e54511910d06ed0213d Mon Sep 17 00:00:00 2001 From: John Lambert Date: Fri, 7 Jun 2024 15:32:55 -0400 Subject: [PATCH] ClearML sometimes returns numbers, not only strings in the Realtime stats. Preserve changes from https://github.com/sillsdev/machine/pull/205. Fixes found during integration testing. --- .../Models/ClearMLTask.cs | 74 +++++++++++++++++ .../Services/NmtPreprocessBuildJob.cs | 5 +- .../Services/PostprocessBuildJob.cs | 4 +- .../Services/PreprocessBuildJob.cs | 18 ++++- .../SmtTransferPostprocessBuildJob.cs | 4 +- .../appsettings.json | 2 +- .../appsettings.json | 2 +- .../Services/NmtEngineServiceTests.cs | 2 +- .../Services/PreprocessBuildJobTests.cs | 81 +++++++++++++------ .../Services/SmtTransferEngineServiceTests.cs | 2 +- 10 files changed, 156 insertions(+), 38 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Models/ClearMLTask.cs b/src/SIL.Machine.AspNetCore/Models/ClearMLTask.cs index dbb998952..be9887d20 100644 --- a/src/SIL.Machine.AspNetCore/Models/ClearMLTask.cs +++ b/src/SIL.Machine.AspNetCore/Models/ClearMLTask.cs @@ -29,5 +29,79 @@ public required IReadOnlyDictionary< string, IReadOnlyDictionary > LastMetrics { get; init; } + + [JsonConverter(typeof(DictionaryStringStringConverter))] public required IReadOnlyDictionary Runtime { get; init; } } + +internal sealed class DictionaryStringStringConverter : JsonConverter> +{ + public override IReadOnlyDictionary Read( + ref Utf8JsonReader reader, + Type typeToConvert, + JsonSerializerOptions options + ) + { + if (reader.TokenType != JsonTokenType.StartObject) + { + throw new JsonException($"JsonTokenType was of type {reader.TokenType}, only objects are supported"); + } + + var dictionary = new Dictionary(); + while (reader.Read()) + { + if (reader.TokenType == JsonTokenType.EndObject) + { + return dictionary; + } + + if (reader.TokenType != JsonTokenType.PropertyName) + { + throw new JsonException("JsonTokenType was not PropertyName"); + } + + var propertyName = reader.GetString(); + + if (string.IsNullOrWhiteSpace(propertyName)) + { + throw new JsonException("Failed to get property name"); + } + + reader.Read(); + + dictionary.Add(propertyName!, ExtractValue(ref reader)); + } + + return dictionary; + } + + public override void Write( + Utf8JsonWriter writer, + IReadOnlyDictionary value, + JsonSerializerOptions options + ) + { + JsonSerializer.Serialize(writer, value, options); + } + + private static string ExtractValue(ref Utf8JsonReader reader) + { + switch (reader.TokenType) + { + case JsonTokenType.String: + return reader.GetString() ?? "Error Reading String."; + case JsonTokenType.False: + return "false"; + case JsonTokenType.True: + return "true"; + case JsonTokenType.Null: + return "null"; + case JsonTokenType.Number: + if (reader.TryGetDouble(out var result)) + return result.ToString(CultureInfo.InvariantCulture); + return "Error Reading Number."; + default: + throw new JsonException($"'{reader.TokenType}' is not supported"); + } + } +} diff --git a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs index 1dd9c38f5..5ba4d99d1 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs @@ -13,9 +13,8 @@ ILanguageTagService languageTagService { private readonly ILanguageTagService _languageTagService = languageTagService; - protected override string ResolveLanguageCode(string languageCode) + protected override bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode) { - _languageTagService.ConvertToFlores200Code(languageCode, out string resolvedCode); - return resolvedCode; + return _languageTagService.ConvertToFlores200Code(languageCode, out resolvedCode); } } diff --git a/src/SIL.Machine.AspNetCore/Services/PostprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/PostprocessBuildJob.cs index 1bf7a4389..f5d7c2c98 100644 --- a/src/SIL.Machine.AspNetCore/Services/PostprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/PostprocessBuildJob.cs @@ -30,7 +30,7 @@ CancellationToken cancellationToken await using (await @lock.WriterLockAsync(cancellationToken: CancellationToken.None)) { - int additionalCorpusSize = await SaveModelAsync(engineId, buildId); + int additionalCorpusSize = await SaveModelAsync(engineId); await PlatformService.BuildCompletedAsync( buildId, corpusSize + additionalCorpusSize, @@ -43,7 +43,7 @@ await PlatformService.BuildCompletedAsync( Logger.LogInformation("Build completed ({0}).", buildId); } - protected virtual Task SaveModelAsync(string engineId, string buildId) + protected virtual Task SaveModelAsync(string engineId) { return Task.FromResult(0); } diff --git a/src/SIL.Machine.AspNetCore/Services/PreprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/PreprocessBuildJob.cs index 5fd1614c4..c5d011fe0 100644 --- a/src/SIL.Machine.AspNetCore/Services/PreprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/PreprocessBuildJob.cs @@ -70,10 +70,19 @@ CancellationToken cancellationToken if (engine is null) throw new OperationCanceledException($"Engine {engineId} does not exist. Build canceled."); - buildPreprocessSummary.Add("SourceLanguageResolved", ResolveLanguageCode(engine.SourceLanguage)); - buildPreprocessSummary.Add("TargetLanguageResolved", ResolveLanguageCode(engine.TargetLanguage)); + bool sourceTagInBaseModel = ResolveLanguageCodeForBaseModel(engine.SourceLanguage, out string srcLang); + buildPreprocessSummary.Add("SourceLanguageResolved", srcLang); + bool targetTagInBaseModel = ResolveLanguageCodeForBaseModel(engine.TargetLanguage, out string trgLang); + buildPreprocessSummary.Add("TargetLanguageResolved", trgLang); Logger.LogInformation("{summary}", buildPreprocessSummary.ToJsonString()); + if (trainCount == 0 && (!sourceTagInBaseModel || !targetTagInBaseModel)) + { + throw new InvalidOperationException( + $"Neither language code in build {buildId} are known to the base model, and the data specified for training was empty. Build canceled." + ); + } + cancellationToken.ThrowIfCancellationRequested(); await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken)) @@ -418,8 +427,9 @@ private record Row( int RowCount ); - protected virtual string ResolveLanguageCode(string languageCode) + protected virtual bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode) { - return languageCode; + resolvedCode = languageCode; + return true; } } diff --git a/src/SIL.Machine.AspNetCore/Services/SmtTransferPostprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/SmtTransferPostprocessBuildJob.cs index 38c65de64..435d82263 100644 --- a/src/SIL.Machine.AspNetCore/Services/SmtTransferPostprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/SmtTransferPostprocessBuildJob.cs @@ -18,11 +18,11 @@ IOptionsMonitor options private readonly IRepository _trainSegmentPairs = trainSegmentPairs; private readonly IOptionsMonitor _options = options; - protected override async Task SaveModelAsync(string engineId, string buildId) + protected override async Task SaveModelAsync(string engineId) { await using ( Stream engineStream = await SharedFileService.OpenReadAsync( - $"builds/{buildId}/model.zip", + $"models/{engineId}.zip", CancellationToken.None ) ) diff --git a/src/SIL.Machine.Serval.EngineServer/appsettings.json b/src/SIL.Machine.Serval.EngineServer/appsettings.json index 828d2a41e..12f4a051c 100644 --- a/src/SIL.Machine.Serval.EngineServer/appsettings.json +++ b/src/SIL.Machine.Serval.EngineServer/appsettings.json @@ -20,7 +20,7 @@ }, { "TranslationEngineType": "SmtTransfer", - "ModelType": "hmm", + "ModelType": "thot", "Queue": "cpu_only", "DockerImage": "ghcr.io/sillsdev/machine.py:latest" } diff --git a/src/SIL.Machine.Serval.JobServer/appsettings.json b/src/SIL.Machine.Serval.JobServer/appsettings.json index 2e83382ba..4ff49d691 100644 --- a/src/SIL.Machine.Serval.JobServer/appsettings.json +++ b/src/SIL.Machine.Serval.JobServer/appsettings.json @@ -20,7 +20,7 @@ }, { "TranslationEngineType": "SmtTransfer", - "ModelType": "hmm", + "ModelType": "thot", "Queue": "jobs_backlog", "DockerImage": "ghcr.io/sillsdev/machine.py:latest" } diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs b/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs index 292f0c35c..33d25da3b 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs @@ -139,7 +139,7 @@ public TestEnvironment() new ClearMLBuildQueue() { TranslationEngineType = TranslationEngineType.SmtTransfer, - ModelType = "hmm", + ModelType = "thot", DockerImage = "default", Queue = "default" } diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs b/tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs index 0b8b6551c..122e4388f 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs @@ -223,6 +223,15 @@ public void RunAsync_UnknownLanguageTagsNoData() }); } + [Test] + public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer() + { + using TestEnvironment env = new(); + Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" }; + + await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: TranslationEngineType.SmtTransfer); + } + private class TestEnvironment : ObjectModel.DisposableBase { private static readonly string TestDataPath = Path.Combine( @@ -242,9 +251,7 @@ private class TestEnvironment : ObjectModel.DisposableBase public MemoryRepository Engines { get; } public IDistributedReaderWriterLockFactory LockFactory { get; } public IBuildJobService BuildJobService { get; } - public ILogger Logger { get; } public IClearMLService ClearMLService { get; } - public PreprocessBuildJob BuildJob { get; } public IOptionsMonitor BuildJobOptions { get; } public Corpus DefaultTextFileCorpus { get; } @@ -399,7 +406,7 @@ public TestEnvironment() new ClearMLBuildQueue() { TranslationEngineType = TranslationEngineType.SmtTransfer, - ModelType = "hmm", + ModelType = "thot", DockerImage = "default", Queue = "default" } @@ -426,7 +433,6 @@ public TestEnvironment() ) .Returns(Task.FromResult("job1")); SharedFileService = new SharedFileService(Substitute.For()); - Logger = Substitute.For>(); BuildJobService = new BuildJobService( [ [ @@ -450,29 +456,58 @@ [new NmtHangfireBuildJobFactory()] ], Engines ); - BuildJob = new PreprocessBuildJob( - PlatformService, - Engines, - LockFactory, - Logger, - BuildJobService, - SharedFileService, - CorpusService - ) + } + + public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType) + { + switch (engineType) { - Seed = 1234 - }; + case TranslationEngineType.Nmt: + { + return new NmtPreprocessBuildJob( + PlatformService, + Engines, + LockFactory, + Substitute.For>(), + BuildJobService, + SharedFileService, + CorpusService, + new LanguageTagService() + ) + { + Seed = 1234 + }; + } + case TranslationEngineType.SmtTransfer: + { + return new PreprocessBuildJob( + PlatformService, + Engines, + LockFactory, + Substitute.For>(), + BuildJobService, + SharedFileService, + CorpusService + ) + { + Seed = 1234 + }; + } + default: + throw new InvalidOperationException("Unknown engine type."); + } + ; } - public Task RunBuildJobAsync(Corpus corpus, bool useKeyTerms = true, string engineId = "engine1") + public Task RunBuildJobAsync( + Corpus corpus, + bool useKeyTerms = true, + string engineId = "engine1", + TranslationEngineType engineType = TranslationEngineType.Nmt + ) { - return BuildJob.RunAsync( - engineId, - "build1", - [corpus], - useKeyTerms ? null : "{\"use_key_terms\":false}", - default - ); + return GetBuildJob(engineType) + .RunAsync(engineId, "build1", [corpus], useKeyTerms ? null : "{\"use_key_terms\":false}", default); } public async Task<(int Source1Count, int Source2Count, int TargetCount, int TermCount)> GetTrainCountAsync() diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/SmtTransferEngineServiceTests.cs b/tests/SIL.Machine.AspNetCore.Tests/Services/SmtTransferEngineServiceTests.cs index 648a28796..799dcbae1 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Services/SmtTransferEngineServiceTests.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/SmtTransferEngineServiceTests.cs @@ -273,7 +273,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp new ClearMLBuildQueue() { TranslationEngineType = TranslationEngineType.SmtTransfer, - ModelType = "hmm", + ModelType = "thot", DockerImage = "default", Queue = "default" }