Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smt on clearml #200

Merged
merged 4 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ csharp_new_line_before_finally = true
csharp_new_line_before_members_in_object_initializers = true
csharp_new_line_before_members_in_anonymous_types = true

# Indentation settings
csharp_indent_case_contents_when_block = false

# Namespace settings
csharp_style_namespace_declarations = file_scoped

Expand Down
3 changes: 1 addition & 2 deletions src/SIL.Machine.AspNetCore/Configuration/BuildJobOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,5 @@ public class BuildJobOptions
{
public const string Key = "BuildJob";

public Dictionary<BuildJobType, BuildJobRunner> Runners { get; set; } =
new() { { BuildJobType.Cpu, BuildJobRunner.Hangfire }, { BuildJobType.Gpu, BuildJobRunner.ClearML } };
public IList<ClearMLBuildQueue> ClearML { get; set; } = new List<ClearMLBuildQueue>();
}
9 changes: 9 additions & 0 deletions src/SIL.Machine.AspNetCore/Configuration/ClearMLBuildQueue.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace SIL.Machine.AspNetCore.Configuration;

public class ClearMLBuildQueue
{
public TranslationEngineType TranslationEngineType { get; set; }
public string ModelType { get; set; } = "";
public string Queue { get; set; } = "default";
public string DockerImage { get; set; } = "";
}
3 changes: 0 additions & 3 deletions src/SIL.Machine.AspNetCore/Configuration/ClearMLOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,10 @@ public class ClearMLOptions
{
public const string Key = "ClearML";

public string Queue { get; set; } = "default";
public string AccessKey { get; set; } = "";
public string SecretKey { get; set; } = "";
public bool BuildPollingEnabled { get; set; } = false;
public TimeSpan BuildPollingTimeout { get; set; } = TimeSpan.FromSeconds(10);
public string ModelType { get; set; } = "huggingface";
public string RootProject { get; set; } = "Machine";
public string Project { get; set; } = "dev";
public string DockerImage { get; set; } = "";
}
120 changes: 43 additions & 77 deletions src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,21 @@ public static IMachineBuilder AddSharedFileOptions(this IMachineBuilder builder,
return builder;
}

public static IMachineBuilder AddBuildJobOptions(
this IMachineBuilder builder,
Action<BuildJobOptions> configureOptions
)
{
builder.Services.Configure(configureOptions);
return builder;
}

public static IMachineBuilder AddBuildJobOptions(this IMachineBuilder builder, IConfiguration config)
{
builder.Services.Configure<BuildJobOptions>(config);
return builder;
}

public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder)
{
if (builder.Configuration is null)
Expand Down Expand Up @@ -131,26 +146,6 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st
return builder;
}

private static IMachineBuilder AddClearMLBuildJobRunner(this IMachineBuilder builder)
{
builder.Services.AddScoped<IBuildJobRunner, ClearMLBuildJobRunner>();
builder.Services.AddScoped<IClearMLBuildJobFactory, NmtClearMLBuildJobFactory>();
builder.Services.AddSingleton<ClearMLMonitorService>();
builder.Services.AddHostedService(p => p.GetRequiredService<ClearMLMonitorService>());

return builder;
}

private static IMachineBuilder AddHangfireBuildJobRunner(this IMachineBuilder builder)
{
builder.Services.AddScoped<IBuildJobRunner, HangfireBuildJobRunner>();

builder.Services.AddScoped<IHangfireBuildJobFactory, SmtTransferHangfireBuildJobFactory>();
builder.Services.AddScoped<IHangfireBuildJobFactory, NmtHangfireBuildJobFactory>();

return builder;
}

private static MongoStorageOptions GetMongoStorageOptions()
{
var mongoStorageOptions = new MongoStorageOptions
Expand Down Expand Up @@ -200,6 +195,7 @@ public static IMachineBuilder AddHangfireJobServer(
switch (engineType)
{
case TranslationEngineType.SmtTransfer:
builder.Services.AddSingleton<SmtTransferEngineStateService>();
builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser();
queues.Add("smt_transfer");
break;
Expand Down Expand Up @@ -252,7 +248,7 @@ await c.Indexes.CreateOrUpdateAsync(
);
await c.Indexes.CreateOrUpdateAsync(
new CreateIndexModel<TranslationEngine>(
Builders<TranslationEngine>.IndexKeys.Ascending(e => e.CurrentBuild!.JobRunner)
Builders<TranslationEngine>.IndexKeys.Ascending(e => e.CurrentBuild!.BuildJobRunner)
)
);
}
Expand Down Expand Up @@ -360,49 +356,38 @@ public static IMachineBuilder AddServalTranslationEngineService(
return builder;
}

public static IMachineBuilder AddBuildJobService(
this IMachineBuilder builder,
Action<BuildJobOptions> configureOptions
)
public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, string? smtTransferEngineDir = null)
{
builder.Services.Configure(configureOptions);
var options = new BuildJobOptions();
configureOptions(options);
return builder.AddBuildJobService(options);
}
builder.Services.AddScoped<IBuildJobService, BuildJobService>();

public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, IConfiguration config)
{
builder.Services.Configure<BuildJobOptions>(config);
var buildJobOptions = new BuildJobOptions();
config.GetSection(BuildJobOptions.Key).Bind(buildJobOptions);
return builder.AddBuildJobService(buildJobOptions);
}
builder.Services.AddScoped<IBuildJobRunner, ClearMLBuildJobRunner>();
builder.Services.AddScoped<IClearMLBuildJobFactory, NmtClearMLBuildJobFactory>();
builder.Services.AddScoped<IClearMLBuildJobFactory, SmtTransferClearMLBuildJobFactory>();
builder.Services.AddSingleton<ClearMLMonitorService>();
builder.Services.AddSingleton<IClearMLQueueService>(x => x.GetRequiredService<ClearMLMonitorService>());
builder.Services.AddHostedService(p => p.GetRequiredService<ClearMLMonitorService>());

public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder)
{
if (builder.Configuration is null)
{
builder.AddBuildJobService(o => { });
}
else
{
builder.AddBuildJobService(builder.Configuration.GetSection(BuildJobOptions.Key));
builder.Services.AddScoped<IBuildJobRunner, HangfireBuildJobRunner>();
builder.Services.AddScoped<IHangfireBuildJobFactory, NmtHangfireBuildJobFactory>();
builder.Services.AddScoped<IHangfireBuildJobFactory, SmtTransferHangfireBuildJobFactory>();

if (smtTransferEngineDir is null)
{
var smtTransferEngineOptions = new SmtTransferEngineOptions();
builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions);
string? driveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)?[..1];
if (driveLetter is null)
throw new InvalidOperationException("SMT Engine directory is required");
// add health check for disk storage capacity
builder
.Services.AddHealthChecks()
.AddDiskStorageHealthCheck(
x => x.AddDrive(driveLetter, 1_000), // 1GB
"SMT Engine Storage Capacity",
HealthStatus.Degraded
);
builder.Configuration?.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions);
smtTransferEngineDir = smtTransferEngineOptions.EnginesDir;
}
string? driveLetter = Path.GetPathRoot(smtTransferEngineDir)?[..1];
if (driveLetter is null)
throw new InvalidOperationException("SMT Engine directory is required");
// add health check for disk storage capacity
builder
.Services.AddHealthChecks()
.AddDiskStorageHealthCheck(
x => x.AddDrive(driveLetter, 1_000), // 1GB
"SMT Engine Storage Capacity",
HealthStatus.Degraded
);

return builder;
}
Expand All @@ -412,23 +397,4 @@ public static IMachineBuilder AddModelCleanupService(this IMachineBuilder builde
builder.Services.AddHostedService<ModelCleanupService>();
return builder;
}

private static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, BuildJobOptions options)
{
builder.Services.AddScoped<IBuildJobService, BuildJobService>();

foreach (BuildJobRunner runnerType in options.Runners.Values.Distinct())
{
switch (runnerType)
{
case BuildJobRunner.ClearML:
builder.AddClearMLBuildJobRunner();
break;
case BuildJobRunner.Hangfire:
builder.AddHangfireBuildJobRunner();
break;
}
}
return builder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf
builder.AddSharedFileOptions(o => { });
builder.AddSmtTransferEngineOptions(o => { });
builder.AddClearMLOptions(o => { });
builder.AddBuildJobOptions(o => { });
}
else
{
builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key));
builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key));
builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key));
builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key));
builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key));
}
return builder;
}
Expand Down
13 changes: 10 additions & 3 deletions src/SIL.Machine.AspNetCore/Models/Build.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@ public enum BuildJobState
Canceling
}

public enum BuildJobRunner
public enum BuildJobRunnerType
{
Hangfire,
ClearML
}

public enum BuildStage
{
Preprocess,
Train,
Postprocess
}

public record Build
{
public required string BuildId { get; init; }
public required BuildJobState JobState { get; init; }
public required string JobId { get; init; }
public required BuildJobRunner JobRunner { get; init; }
public required string Stage { get; init; }
public required BuildJobRunnerType BuildJobRunner { get; init; }
public required BuildStage Stage { get; init; }
public string? Options { get; set; }
}
3 changes: 3 additions & 0 deletions src/SIL.Machine.AspNetCore/Models/ClearMLTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,7 @@ public required IReadOnlyDictionary<
string,
IReadOnlyDictionary<string, ClearMLMetricsEvent>
> LastMetrics { get; init; }

[JsonConverter(typeof(DictionaryStringStringConverter))]
public required IReadOnlyDictionary<string, string> Runtime { get; init; }
}
1 change: 1 addition & 0 deletions src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ public record TranslationEngine : IEntity
public string Id { get; set; } = "";
public int Revision { get; set; } = 1;
public required string EngineId { get; init; }
public required TranslationEngineType Type { get; init; }
public required string SourceLanguage { get; init; }
public required string TargetLanguage { get; init; }
public required bool IsModelPersisted { get; init; }
Expand Down
1 change: 1 addition & 0 deletions src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
<PackageReference Include="AspNetCore.HealthChecks.MongoDb" Version="6.0.2" />
<PackageReference Include="AspNetCore.HealthChecks.System" Version="6.0.2" />
<PackageReference Include="AWSSDK.S3" Version="3.7.205.8" />
<PackageReference Include="CommunityToolkit.HighPerformance" Version="8.2.2" />
<PackageReference Include="Grpc.AspNetCore" Version="2.57.0" />
<PackageReference Include="Grpc.AspNetCore.HealthChecks" Version="2.57.0" />
<PackageReference Include="HangFire" Version="1.8.5" />
Expand Down
Loading
Loading