Skip to content

Commit

Permalink
ClearML sometimes returns numbers, not only strings in the Realtime s…
Browse files Browse the repository at this point in the history
…tats.

Preserve changes from #205.
Fixes found during integration testing.
  • Loading branch information
johnml1135 committed Jun 7, 2024
1 parent 791cd4a commit 5a5dfab
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 38 deletions.
74 changes: 74 additions & 0 deletions src/SIL.Machine.AspNetCore/Models/ClearMLTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,79 @@ public required IReadOnlyDictionary<
string,
IReadOnlyDictionary<string, ClearMLMetricsEvent>
> LastMetrics { get; init; }

[JsonConverter(typeof(DictionaryStringStringConverter))]
public required IReadOnlyDictionary<string, string> Runtime { get; init; }
}

internal sealed class DictionaryStringStringConverter : JsonConverter<IReadOnlyDictionary<string, string>>
{
public override IReadOnlyDictionary<string, string> Read(
ref Utf8JsonReader reader,
Type typeToConvert,
JsonSerializerOptions options
)
{
if (reader.TokenType != JsonTokenType.StartObject)
{
throw new JsonException($"JsonTokenType was of type {reader.TokenType}, only objects are supported");
}

var dictionary = new Dictionary<string, string>();
while (reader.Read())
{
if (reader.TokenType == JsonTokenType.EndObject)
{
return dictionary;
}

if (reader.TokenType != JsonTokenType.PropertyName)
{
throw new JsonException("JsonTokenType was not PropertyName");
}

var propertyName = reader.GetString();

if (string.IsNullOrWhiteSpace(propertyName))
{
throw new JsonException("Failed to get property name");
}

reader.Read();

dictionary.Add(propertyName!, ExtractValue(ref reader));
}

return dictionary;
}

public override void Write(
Utf8JsonWriter writer,
IReadOnlyDictionary<string, string> value,
JsonSerializerOptions options
)
{
JsonSerializer.Serialize(writer, value, options);
}

private static string ExtractValue(ref Utf8JsonReader reader)
{
switch (reader.TokenType)
{
case JsonTokenType.String:
return reader.GetString() ?? "Error Reading String.";
case JsonTokenType.False:
return "false";
case JsonTokenType.True:
return "true";
case JsonTokenType.Null:
return "null";
case JsonTokenType.Number:
if (reader.TryGetDouble(out var result))
return result.ToString(CultureInfo.InvariantCulture);
return "Error Reading Number.";
default:
throw new JsonException($"'{reader.TokenType}' is not supported");
}
}
}
5 changes: 2 additions & 3 deletions src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ ILanguageTagService languageTagService
{
private readonly ILanguageTagService _languageTagService = languageTagService;

protected override string ResolveLanguageCode(string languageCode)
protected override bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode)
{
_languageTagService.ConvertToFlores200Code(languageCode, out string resolvedCode);
return resolvedCode;
return _languageTagService.ConvertToFlores200Code(languageCode, out resolvedCode);
}
}
4 changes: 2 additions & 2 deletions src/SIL.Machine.AspNetCore/Services/PostprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ CancellationToken cancellationToken

await using (await @lock.WriterLockAsync(cancellationToken: CancellationToken.None))
{
int additionalCorpusSize = await SaveModelAsync(engineId, buildId);
int additionalCorpusSize = await SaveModelAsync(engineId);
await PlatformService.BuildCompletedAsync(
buildId,
corpusSize + additionalCorpusSize,
Expand All @@ -43,7 +43,7 @@ await PlatformService.BuildCompletedAsync(
Logger.LogInformation("Build completed ({0}).", buildId);
}

protected virtual Task<int> SaveModelAsync(string engineId, string buildId)
protected virtual Task<int> SaveModelAsync(string engineId)
{
return Task.FromResult(0);
}
Expand Down
18 changes: 14 additions & 4 deletions src/SIL.Machine.AspNetCore/Services/PreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,19 @@ CancellationToken cancellationToken
if (engine is null)
throw new OperationCanceledException($"Engine {engineId} does not exist. Build canceled.");

buildPreprocessSummary.Add("SourceLanguageResolved", ResolveLanguageCode(engine.SourceLanguage));
buildPreprocessSummary.Add("TargetLanguageResolved", ResolveLanguageCode(engine.TargetLanguage));
bool sourceTagInBaseModel = ResolveLanguageCodeForBaseModel(engine.SourceLanguage, out string srcLang);
buildPreprocessSummary.Add("SourceLanguageResolved", srcLang);
bool targetTagInBaseModel = ResolveLanguageCodeForBaseModel(engine.TargetLanguage, out string trgLang);
buildPreprocessSummary.Add("TargetLanguageResolved", trgLang);
Logger.LogInformation("{summary}", buildPreprocessSummary.ToJsonString());

if (trainCount == 0 && (!sourceTagInBaseModel || !targetTagInBaseModel))
{
throw new InvalidOperationException(
$"Neither language code in build {buildId} are known to the base model, and the data specified for training was empty. Build canceled."
);
}

cancellationToken.ThrowIfCancellationRequested();

await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
Expand Down Expand Up @@ -418,8 +427,9 @@ private record Row(
int RowCount
);

protected virtual string ResolveLanguageCode(string languageCode)
protected virtual bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode)
{
return languageCode;
resolvedCode = languageCode;
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ IOptionsMonitor<SmtTransferEngineOptions> options
private readonly IRepository<TrainSegmentPair> _trainSegmentPairs = trainSegmentPairs;
private readonly IOptionsMonitor<SmtTransferEngineOptions> _options = options;

protected override async Task<int> SaveModelAsync(string engineId, string buildId)
protected override async Task<int> SaveModelAsync(string engineId)
{
await using (
Stream engineStream = await SharedFileService.OpenReadAsync(
$"builds/{buildId}/model.zip",
$"models/{engineId}.zip",
CancellationToken.None
)
)
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Serval.EngineServer/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"TranslationEngineType": "SmtTransfer",
"ModelType": "hmm",
"ModelType": "thot",
"Queue": "cpu_only",
"DockerImage": "ghcr.io/sillsdev/machine.py:latest"
}
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Serval.JobServer/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"TranslationEngineType": "SmtTransfer",
"ModelType": "hmm",
"ModelType": "thot",
"Queue": "jobs_backlog",
"DockerImage": "ghcr.io/sillsdev/machine.py:latest"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public TestEnvironment()
new ClearMLBuildQueue()
{
TranslationEngineType = TranslationEngineType.SmtTransfer,
ModelType = "hmm",
ModelType = "thot",
DockerImage = "default",
Queue = "default"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,15 @@ public void RunAsync_UnknownLanguageTagsNoData()
});
}

[Test]
public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" };

await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: TranslationEngineType.SmtTransfer);
}

private class TestEnvironment : ObjectModel.DisposableBase
{
private static readonly string TestDataPath = Path.Combine(
Expand All @@ -242,9 +251,7 @@ private class TestEnvironment : ObjectModel.DisposableBase
public MemoryRepository<TranslationEngine> Engines { get; }
public IDistributedReaderWriterLockFactory LockFactory { get; }
public IBuildJobService BuildJobService { get; }
public ILogger<PreprocessBuildJob> Logger { get; }
public IClearMLService ClearMLService { get; }
public PreprocessBuildJob BuildJob { get; }
public IOptionsMonitor<BuildJobOptions> BuildJobOptions { get; }

public Corpus DefaultTextFileCorpus { get; }
Expand Down Expand Up @@ -399,7 +406,7 @@ public TestEnvironment()
new ClearMLBuildQueue()
{
TranslationEngineType = TranslationEngineType.SmtTransfer,
ModelType = "hmm",
ModelType = "thot",
DockerImage = "default",
Queue = "default"
}
Expand All @@ -426,7 +433,6 @@ public TestEnvironment()
)
.Returns(Task.FromResult("job1"));
SharedFileService = new SharedFileService(Substitute.For<ILoggerFactory>());
Logger = Substitute.For<ILogger<NmtPreprocessBuildJob>>();
BuildJobService = new BuildJobService(
[
[
Expand All @@ -450,29 +456,58 @@ [new NmtHangfireBuildJobFactory()]
],
Engines

Check failure on line 457 in tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

Syntax error, ']' expected

Check failure on line 457 in tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

Syntax error, ']' expected

Check failure on line 457 in tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

Syntax error, ']' expected

Check failure on line 457 in tests/SIL.Machine.AspNetCore.Tests/Services/PreprocessBuildJobTests.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

Syntax error, ']' expected
);
BuildJob = new PreprocessBuildJob(
PlatformService,
Engines,
LockFactory,
Logger,
BuildJobService,
SharedFileService,
CorpusService
)
}

public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType)
{
switch (engineType)
{
Seed = 1234
};
case TranslationEngineType.Nmt:
{
return new NmtPreprocessBuildJob(
PlatformService,
Engines,
LockFactory,
Substitute.For<ILogger<NmtPreprocessBuildJob>>(),
BuildJobService,
SharedFileService,
CorpusService,
new LanguageTagService()
)
{
Seed = 1234
};
}
case TranslationEngineType.SmtTransfer:
{
return new PreprocessBuildJob(
PlatformService,
Engines,
LockFactory,
Substitute.For<ILogger<PreprocessBuildJob>>(),
BuildJobService,
SharedFileService,
CorpusService
)
{
Seed = 1234
};
}
default:
throw new InvalidOperationException("Unknown engine type.");
}
;
}

public Task RunBuildJobAsync(Corpus corpus, bool useKeyTerms = true, string engineId = "engine1")
public Task RunBuildJobAsync(
Corpus corpus,
bool useKeyTerms = true,
string engineId = "engine1",
TranslationEngineType engineType = TranslationEngineType.Nmt
)
{
return BuildJob.RunAsync(
engineId,
"build1",
[corpus],
useKeyTerms ? null : "{\"use_key_terms\":false}",
default
);
return GetBuildJob(engineType)
.RunAsync(engineId, "build1", [corpus], useKeyTerms ? null : "{\"use_key_terms\":false}", default);
}

public async Task<(int Source1Count, int Source2Count, int TargetCount, int TermCount)> GetTrainCountAsync()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp
new ClearMLBuildQueue()
{
TranslationEngineType = TranslationEngineType.SmtTransfer,
ModelType = "hmm",
ModelType = "thot",
DockerImage = "default",
Queue = "default"
}
Expand Down

0 comments on commit 5a5dfab

Please sign in to comment.