-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ad946b2
commit 8e0d367
Showing
18 changed files
with
397 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
...Configuration/StatisticalEngineOptions.cs → ...nfiguration/WordAlignmentEngineOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
namespace Serval.Machine.Shared.Configuration; | ||
|
||
public class WordAlignmentModelOptions | ||
{ | ||
public const string Key = "WordAlignmentModel"; | ||
|
||
public WordAlignmentModelOptions() | ||
{ | ||
string installDir = Path.GetDirectoryName(Assembly.GetEntryAssembly()!.Location)!; | ||
NewModelFile = Path.Combine(installDir, "thot-new-model.zip"); | ||
} | ||
|
||
public string NewModelFile { get; set; } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
namespace Serval.Machine.Shared.Services; | ||
|
||
public interface IModelFactory | ||
{ | ||
ITrainer CreateTrainer( | ||
string engineDir, | ||
IRangeTokenizer<string, int, string> tokenizer, | ||
IParallelTextCorpus corpus | ||
); | ||
|
||
void InitNew(string engineDir); | ||
void Cleanup(string engineDir); | ||
Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default); | ||
Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default); | ||
} |
16 changes: 16 additions & 0 deletions
16
src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
namespace Serval.Machine.Shared.Services; | ||
|
||
public interface IWordAlignmentModelFactory | ||
{ | ||
IWordAlignmentModel Create(string engineDir, string modelType); | ||
ITrainer CreateTrainer( | ||
string engineDir, | ||
string modelType, | ||
ITokenizer<string, int, string> tokenizer, | ||
IParallelTextCorpus corpus | ||
); | ||
void InitNew(string engineDir); | ||
void Cleanup(string engineDir); | ||
Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default); | ||
Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default); | ||
} |
74 changes: 74 additions & 0 deletions
74
src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
namespace Serval.Machine.Shared.Services; | ||
|
||
public abstract class ModelFactoryBase : IModelFactory | ||
{ | ||
public virtual ITrainer CreateTrainer( | ||
string engineDir, | ||
IRangeTokenizer<string, int, string> tokenizer, | ||
IParallelTextCorpus corpus | ||
) | ||
{ | ||
throw new NotImplementedException(); | ||
} | ||
|
||
public virtual void InitNew(string engineDir) | ||
{ | ||
throw new NotImplementedException(); | ||
} | ||
|
||
public void Cleanup(string engineDir) | ||
{ | ||
if (!Directory.Exists(engineDir)) | ||
return; | ||
DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "lm")); | ||
DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "tm")); | ||
string smtConfigFileName = Path.Combine(engineDir, "smt.cfg"); | ||
if (File.Exists(smtConfigFileName)) | ||
File.Delete(smtConfigFileName); | ||
if (!Directory.EnumerateFileSystemEntries(engineDir).Any()) | ||
Directory.Delete(engineDir); | ||
} | ||
|
||
public async Task UpdateEngineFromAsync( | ||
string engineDir, | ||
Stream source, | ||
CancellationToken cancellationToken = default | ||
) | ||
{ | ||
if (!Directory.Exists(engineDir)) | ||
Directory.CreateDirectory(engineDir); | ||
|
||
await using MemoryStream memoryStream = new(); | ||
await using (GZipStream gzipStream = new(source, CompressionMode.Decompress)) | ||
{ | ||
await gzipStream.CopyToAsync(memoryStream, cancellationToken); | ||
} | ||
memoryStream.Seek(0, SeekOrigin.Begin); | ||
await TarFile.ExtractToDirectoryAsync( | ||
memoryStream, | ||
engineDir, | ||
overwriteFiles: true, | ||
cancellationToken: cancellationToken | ||
); | ||
} | ||
|
||
public async Task SaveEngineToAsync( | ||
string engineDir, | ||
Stream destination, | ||
CancellationToken cancellationToken = default | ||
) | ||
{ | ||
// create zip archive in memory stream | ||
// This cannot be created directly to the shared stream because it all needs to be written at once | ||
await using MemoryStream memoryStream = new(); | ||
await TarFile.CreateFromDirectoryAsync( | ||
engineDir, | ||
memoryStream, | ||
includeBaseDirectory: false, | ||
cancellationToken: cancellationToken | ||
); | ||
memoryStream.Seek(0, SeekOrigin.Begin); | ||
await using GZipStream gzipStream = new(destination, CompressionMode.Compress); | ||
await memoryStream.CopyToAsync(gzipStream, cancellationToken); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.