Skip to content

Commit

Permalink
Lots of broken.
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Nov 22, 2024
1 parent ad946b2 commit 8e0d367
Show file tree
Hide file tree
Showing 18 changed files with 397 additions and 87 deletions.
3 changes: 3 additions & 0 deletions src/Machine/src/Serval.Machine.EngineServer/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
"SmtTransferEngine": {
"EnginesDir": "/var/lib/machine/engines"
},
"WordAlignmentEngine": {
"EnginesDir": "/var/lib/machine/engines"
},
"ClearML": {
"BuildPollingEnabled": true
},
Expand Down
3 changes: 3 additions & 0 deletions src/Machine/src/Serval.Machine.JobServer/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
"SmtTransferEngine": {
"EnginesDir": "/var/lib/machine/engines"
},
"WordAlignmentEngine": {
"EnginesDir": "/var/lib/machine/engines"
},
"ClearML": {
"BuildPollingEnabled": false
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ public static IMachineBuilder AddSmtTransferEngineOptions(this IMachineBuilder b
return builder;
}

public static IMachineBuilder AddWordAlignmentEngineOptions(this IMachineBuilder builder, IConfiguration config)
{
builder.Services.Configure<WordAlignmentEngineOptions>(config);
return builder;
}

public static IMachineBuilder AddClearMLOptions(this IMachineBuilder builder, IConfiguration config)
{
builder.Services.Configure<ClearMLOptions>(config);
Expand Down Expand Up @@ -178,13 +184,17 @@ public static IMachineBuilder AddHangfireJobServer(
switch (engineType)
{
case EngineType.SmtTransfer:
builder.Services.AddSingleton<SmtTransferEngineStateService>();
builder.Services.AddHostedService<SmtTransferEngineCommitService>();
builder.AddThot();
queues.Add("smt_transfer");
break;
case EngineType.Nmt:
queues.Add("nmt");
break;
case EngineType.Statistical:
builder.Services.AddSingleton<WordAlignmentEngineStateService>();
builder.Services.AddHostedService<StatisticalEngineCommitService>();

Check failure on line 197 in src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs

View workflow job for this annotation

GitHub Actions / Build

The type or namespace name 'StatisticalEngineCommitService' could not be found (are you missing a using directive or an assembly reference?)
builder.AddThot();
queues.Add("statistical");
break;
Expand Down Expand Up @@ -454,6 +464,8 @@ public static IMachineBuilder AddServalWordAlignmentEngineService(
switch (engineType)
{
case EngineType.Statistical:
builder.Services.AddSingleton<WordAlignmentEngineStateService>();
builder.Services.AddHostedService<StatisticalEngineCommitService>();

Check failure on line 468 in src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs

View workflow job for this annotation

GitHub Actions / Build

The type or namespace name 'StatisticalEngineCommitService' could not be found (are you missing a using directive or an assembly reference?)
builder.AddThot();
builder.Services.AddScoped<IWordAlignmentEngineService, StatisticalEngineService>();
break;
Expand All @@ -469,8 +481,6 @@ public static IMachineBuilder AddThot(this IMachineBuilder builder)
{
try
{
builder.Services.AddSingleton<SmtTransferEngineStateService>();
builder.Services.AddHostedService<SmtTransferEngineCommitService>();
builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser();
}
catch (ArgumentException)
Expand All @@ -480,7 +490,7 @@ public static IMachineBuilder AddThot(this IMachineBuilder builder)
return builder;
}

public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, string? smtTransferEngineDir = null)
public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder)
{
builder.Services.AddScoped<IBuildJobService<TranslationEngine>, TranslationBuildJobService>();
builder.Services.AddScoped<IBuildJobService<WordAlignmentEngine>, BuildJobService<WordAlignmentEngine>>();
Expand All @@ -499,21 +509,22 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, s
builder.Services.AddScoped<IHangfireBuildJobFactory, SmtTransferHangfireBuildJobFactory>();
builder.Services.AddScoped<IHangfireBuildJobFactory, StatisticalHangfireBuildJobFactory>();

if (smtTransferEngineDir is null)
{
var smtTransferEngineOptions = new SmtTransferEngineOptions();
builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions);
smtTransferEngineDir = smtTransferEngineOptions.EnginesDir;
}
string? driveLetter = Path.GetPathRoot(smtTransferEngineDir)?[..1];
if (driveLetter is null)
throw new InvalidOperationException("SMT Engine directory is required");
var smtTransferEngineOptions = new SmtTransferEngineOptions();
builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions);
string? smtDriveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)?[..1];
var statisticsEngineOptions = new WordAlignmentEngineOptions();
builder.Configuration.GetSection(WordAlignmentEngineOptions.Key).Bind(statisticsEngineOptions);
string? statisticsDriveLetter = Path.GetPathRoot(statisticsEngineOptions.EnginesDir)?[..1];
if (smtDriveLetter is null || statisticsDriveLetter is null)
throw new InvalidOperationException("SMT Engine and Statistical directory is required");
if (smtDriveLetter != statisticsDriveLetter)
throw new InvalidOperationException("SMT Engine and Statistical directory must be on the same drive");
// add health check for disk storage capacity
builder
.Services.AddHealthChecks()
.AddDiskStorageHealthCheck(
x => x.AddDrive(driveLetter, 1_000), // 1GB
"SMT Engine Storage Capacity",
x => x.AddDrive(smtDriveLetter, 1_000), // 1GB
"SMT and Statistical Engine Storage Capacity",
HealthStatus.Degraded
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf
builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key));
builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key));
builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key));
builder.AddWordAlignmentEngineOptions(configuration.GetSection(WordAlignmentEngineOptions.Key));
builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key));
builder.AddDistributedReaderWriterLockOptions(configuration.GetSection(DistributedReaderWriterLockOptions.Key));
builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
namespace Serval.Machine.Shared.Configuration;

public class StatisticalEngineOptions
public class WordAlignmentEngineOptions
{
public const string Key = "StatisticalEngine";
public const string Key = "WordAlignmentEngine";

public string EnginesDir { get; set; } = "word_alignment_engines";
public TimeSpan EngineCommitFrequency { get; set; } = TimeSpan.FromMinutes(5);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
namespace Serval.Machine.Shared.Configuration;

public class WordAlignmentModelOptions
{
public const string Key = "WordAlignmentModel";

public WordAlignmentModelOptions()
{
string installDir = Path.GetDirectoryName(Assembly.GetEntryAssembly()!.Location)!;
NewModelFile = Path.Combine(installDir, "thot-new-model.zip");
}

public string NewModelFile { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
<PackageReference Include="SIL.Machine" Version="3.4.1" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<PackageReference Include="SIL.Machine.Morphology.HermitCrab" Version="3.4.1" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
<PackageReference Include="SIL.Machine.Translation.Thot" Version="3.4.1" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
<PackageReference Include="SIL.Machine.Tool" Version="3.4.1" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Tool\SIL.Machine.Tool.csproj')" />
<PackageReference Include="SIL.WritingSystems" Version="14.1.1" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
Expand All @@ -49,6 +50,7 @@
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
<ProjectReference Include="..\..\..\..\..\machine\src\SIL.Machine.Tool\SIL.Machine.Tool.csproj" Condition="Exists('..\..\..\..\..\machine\src\SIL.Machine.Tool\SIL.Machine.Tool.csproj')" />
<ProjectReference Include="..\..\..\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj" />
<EmbeddedResource Include="data\flores200languages.csv" />
</ItemGroup>
Expand Down
15 changes: 15 additions & 0 deletions src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace Serval.Machine.Shared.Services;

public interface IModelFactory
{
ITrainer CreateTrainer(
string engineDir,
IRangeTokenizer<string, int, string> tokenizer,
IParallelTextCorpus corpus
);

void InitNew(string engineDir);
void Cleanup(string engineDir);
Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default);
Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
namespace Serval.Machine.Shared.Services;

public interface IWordAlignmentModelFactory
{
IWordAlignmentModel Create(string engineDir, string modelType);
ITrainer CreateTrainer(
string engineDir,
string modelType,
ITokenizer<string, int, string> tokenizer,
IParallelTextCorpus corpus
);
void InitNew(string engineDir);
void Cleanup(string engineDir);
Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default);
Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default);
}
74 changes: 74 additions & 0 deletions src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
namespace Serval.Machine.Shared.Services;

public abstract class ModelFactoryBase : IModelFactory
{
public virtual ITrainer CreateTrainer(
string engineDir,
IRangeTokenizer<string, int, string> tokenizer,
IParallelTextCorpus corpus
)
{
throw new NotImplementedException();
}

public virtual void InitNew(string engineDir)
{
throw new NotImplementedException();
}

public void Cleanup(string engineDir)
{
if (!Directory.Exists(engineDir))
return;
DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "lm"));
DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "tm"));
string smtConfigFileName = Path.Combine(engineDir, "smt.cfg");
if (File.Exists(smtConfigFileName))
File.Delete(smtConfigFileName);
if (!Directory.EnumerateFileSystemEntries(engineDir).Any())
Directory.Delete(engineDir);
}

public async Task UpdateEngineFromAsync(
string engineDir,
Stream source,
CancellationToken cancellationToken = default
)
{
if (!Directory.Exists(engineDir))
Directory.CreateDirectory(engineDir);

await using MemoryStream memoryStream = new();
await using (GZipStream gzipStream = new(source, CompressionMode.Decompress))
{
await gzipStream.CopyToAsync(memoryStream, cancellationToken);
}
memoryStream.Seek(0, SeekOrigin.Begin);
await TarFile.ExtractToDirectoryAsync(
memoryStream,
engineDir,
overwriteFiles: true,
cancellationToken: cancellationToken
);
}

public async Task SaveEngineToAsync(
string engineDir,
Stream destination,
CancellationToken cancellationToken = default
)
{
// create zip archive in memory stream
// This cannot be created directly to the shared stream because it all needs to be written at once
await using MemoryStream memoryStream = new();
await TarFile.CreateFromDirectoryAsync(
engineDir,
memoryStream,
includeBaseDirectory: false,
cancellationToken: cancellationToken
);
memoryStream.Seek(0, SeekOrigin.Begin);
await using GZipStream gzipStream = new(destination, CompressionMode.Compress);
await memoryStream.CopyToAsync(gzipStream, cancellationToken);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ private static EngineType GetEngineType(string engineTypeStr)
);
}

private static WordAlignmentResult Map(TranslationResult source)
private static WordAlignment.V1.WordAlignmentResult Map(TranslationResult source)
{
return new WordAlignmentResult
return new WordAlignment.V1.WordAlignmentResult
{
SourceTokens = { source.SourceTokens },
TargetTokens = { source.TargetTokens },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ public class StatisticalEngineService(
IEnumerable<IPlatformService> platformServices,
IDataAccessContext dataAccessContext,
IRepository<WordAlignmentEngine> engines,
SmtTransferEngineStateService stateService,
WordAlignmentEngineStateService stateService,
IBuildJobService<WordAlignmentEngine> buildJobService,
IClearMLQueueService clearMLQueueService
) : IWordAlignmentEngineService
Expand All @@ -16,7 +16,7 @@ IClearMLQueueService clearMLQueueService
);
private readonly IDataAccessContext _dataAccessContext = dataAccessContext;
private readonly IRepository<WordAlignmentEngine> _engines = engines;
private readonly SmtTransferEngineStateService _stateService = stateService;
private readonly WordAlignmentEngineStateService _stateService = stateService;
private readonly IBuildJobService<WordAlignmentEngine> _buildJobService = buildJobService;
private readonly IClearMLQueueService _clearMLQueueService = clearMLQueueService;

Expand Down Expand Up @@ -47,7 +47,7 @@ public async Task<WordAlignmentEngine> CreateAsync(
cancellationToken: cancellationToken
);

SmtTransferEngineState state = _stateService.Get(engineId);
WordAlignmentEngineState state = _stateService.Get(engineId);
state.InitNew();
return wordAlignmentEngine;
}
Expand All @@ -60,13 +60,13 @@ public async Task<TranslationResult> GetBestPhraseAlignmentAsync(
)
{
WordAlignmentEngine engine = await GetBuiltEngineAsync(engineId, cancellationToken);
SmtTransferEngineState state = _stateService.Get(engineId);
WordAlignmentEngineState state = _stateService.Get(engineId);

IDistributedReaderWriterLock @lock = await _lockFactory.CreateAsync(engineId, cancellationToken);
TranslationResult result = await @lock.ReaderLockAsync(
async ct =>
{
HybridTranslationEngine hybridEngine = await state.GetHybridEngineAsync(engine.BuildRevision, ct);
HybridTranslationEngine hybridEngine = await state.GetEngineAsync(engine.BuildRevision, ct);

Check failure on line 69 in src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs

View workflow job for this annotation

GitHub Actions / Build

Cannot implicitly convert type 'SIL.Machine.Translation.IWordAlignmentEngine' to 'SIL.Machine.Translation.HybridTranslationEngine'. An explicit conversion exists (are you missing a cast?)
// there is no way to cancel this call
return hybridEngine.GetBestPhraseAlignment(sourceSegment, targetSegment);

Check failure on line 71 in src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs

View workflow job for this annotation

GitHub Actions / Build

'HybridTranslationEngine' does not contain a definition for 'GetBestPhraseAlignment' and no accessible extension method 'GetBestPhraseAlignment' accepting a first argument of type 'HybridTranslationEngine' could be found (are you missing a using directive or an assembly reference?)
},
Expand All @@ -92,7 +92,7 @@ await _dataAccessContext.WithTransactionAsync(
);
await _buildJobService.DeleteEngineAsync(engineId, CancellationToken.None);

SmtTransferEngineState state = _stateService.Get(engineId);
WordAlignmentEngineState state = _stateService.Get(engineId);
_stateService.Remove(engineId);
// there is no way to cancel this call
state.DeleteData();
Expand Down Expand Up @@ -122,7 +122,7 @@ public async Task StartBuildAsync(
if (building)
throw new InvalidOperationException("The engine is already building or in the process of canceling.");

SmtTransferEngineState state = _stateService.Get(engineId);
WordAlignmentEngineState state = _stateService.Get(engineId);
state.Touch();
}

Expand All @@ -132,7 +132,7 @@ public async Task CancelBuildAsync(string engineId, CancellationToken cancellati
if (!building)
throw new InvalidOperationException("The engine is not currently building.");

SmtTransferEngineState state = _stateService.Get(engineId);
WordAlignmentEngineState state = _stateService.Get(engineId);
state.Touch();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class StatisticalPostprocessBuildJob(
IDistributedReaderWriterLockFactory lockFactory,
ISmtModelFactory smtModelFactory,
IOptionsMonitor<BuildJobOptions> buildOptions,
IOptionsMonitor<StatisticalEngineOptions> engineOptions
IOptionsMonitor<WordAlignmentEngineOptions> engineOptions
)
: PostprocessBuildJob<WordAlignmentEngine>(
platformServices.First(ps => ps.EngineGroup == EngineGroup.WordAlignment),
Expand All @@ -23,7 +23,7 @@ IOptionsMonitor<StatisticalEngineOptions> engineOptions
)
{
private readonly ISmtModelFactory _smtModelFactory = smtModelFactory;
private readonly IOptionsMonitor<StatisticalEngineOptions> _engineOptions = engineOptions;
private readonly IOptionsMonitor<WordAlignmentEngineOptions> _engineOptions = engineOptions;
private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory;

protected override async Task DoWorkAsync(
Expand Down
Loading

0 comments on commit 8e0d367

Please sign in to comment.