diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj index f9eea0c5..4206b29e 100644 --- a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj +++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj @@ -36,9 +36,9 @@ - - - + + + diff --git a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs index 46baa68d..04c6fe7d 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs @@ -93,16 +93,14 @@ CancellationToken cancellationToken JsonObject? buildOptionsObject = null; if (buildOptions is not null) buildOptionsObject = JsonSerializer.Deserialize(buildOptions); - await using StreamWriter sourceTrainWriter = - new(await _sharedFileService.OpenWriteAsync($"builds/{buildId}/train.src.txt", cancellationToken)); - await using StreamWriter targetTrainWriter = - new(await _sharedFileService.OpenWriteAsync($"builds/{buildId}/train.trg.txt", cancellationToken)); - await using Stream pretranslateStream = await _sharedFileService.OpenWriteAsync( - $"builds/{buildId}/pretranslate.src.json", - cancellationToken - ); - await using Utf8JsonWriter pretranslateWriter = new(pretranslateStream, PretranslateWriterOptions); + using MemoryStream sourceStream = new(); + using MemoryStream targetStream = new(); + using MemoryStream pretranslationStream = new(); + + using StreamWriter targetTrainWriter = new(targetStream, Encoding.Default); + using StreamWriter sourceTrainWriter = new(sourceStream, Encoding.Default); + await using Utf8JsonWriter pretranslateWriter = new(pretranslationStream, PretranslateWriterOptions); int trainCount = 0; int pretranslateCount = 0; @@ -113,8 +111,8 @@ CancellationToken cancellationToken { if (row.SourceSegment.Length > 0 || row.TargetSegment.Length > 0) { - sourceTrainWriter.Write($"{row.SourceSegment}\n"); - targetTrainWriter.Write($"{row.TargetSegment}\n"); + sourceTrainWriter.WriteLine(row.SourceSegment); + targetTrainWriter.WriteLine(row.TargetSegment); } if (row.SourceSegment.Length > 0 && row.TargetSegment.Length > 0) trainCount++; @@ -140,6 +138,21 @@ CancellationToken cancellationToken pretranslateWriter.WriteEndArray(); + await sourceTrainWriter.FlushAsync(cancellationToken); + await targetTrainWriter.FlushAsync(cancellationToken); + + async Task WriteStreamAsync(MemoryStream stream, string path) + { + stream.Position = 0; + await using StreamWriter writer = new(await _sharedFileService.OpenWriteAsync(path, cancellationToken)); + await writer.WriteAsync(Encoding.Default.GetString(stream.ToArray())); + await writer.FlushAsync(cancellationToken); + } + + await WriteStreamAsync(sourceStream, $"builds/{buildId}/train.src.txt"); + await WriteStreamAsync(targetStream, $"builds/{buildId}/train.trg.txt"); + await WriteStreamAsync(pretranslationStream, $"builds/{buildId}/pretranslate.src.json"); + return (trainCount, pretranslateCount); } diff --git a/src/Serval/src/Serval.Shared/Serval.Shared.csproj b/src/Serval/src/Serval.Shared/Serval.Shared.csproj index 75ccbd9b..0e504535 100644 --- a/src/Serval/src/Serval.Shared/Serval.Shared.csproj +++ b/src/Serval/src/Serval.Shared/Serval.Shared.csproj @@ -19,7 +19,7 @@ - + diff --git a/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj b/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj index f9476b69..a64c5d85 100644 --- a/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj +++ b/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj @@ -17,7 +17,7 @@ - +