Skip to content

Commit

Permalink
Fix issue with mapping non-parallel-corpora to parallel corpora
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Oct 17, 2024
1 parent ec88283 commit d3300c7
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 37 deletions.
12 changes: 6 additions & 6 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -681,12 +681,12 @@ pretranslateCorpus is null
);
}
}
return new V1.ParallelCorpus
{
Id = source.Id,
SourceCorpora = { sourceCorpus },
TargetCorpora = { targetCorpus }
};
V1.ParallelCorpus corpus = new() { Id = source.Id };
if (sourceCorpus.Files.Count > 0)
corpus.SourceCorpora.Add(sourceCorpus);
if (targetCorpus.Files.Count > 0)
corpus.TargetCorpora.Add(targetCorpus);
return corpus;
}

private V1.ParallelCorpus Map(
Expand Down
63 changes: 33 additions & 30 deletions src/Serval/test/Serval.E2ETests/ServalClientHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,22 @@ public async Task<string> AddTextCorpusToEngineAsync(
bool pretranslate
)
{
List<DataFile> sourceFiles = await UploadFilesAsync(filesToAdd, FileFormat.Text, sourceLanguage);
List<DataFile> sourceFiles = await UploadFilesAsync(
filesToAdd,
FileFormat.Text,
sourceLanguage,
isTarget: false
);

var targetFileConfig = new List<TranslationCorpusFileConfig>();
if (!pretranslate)
{
List<DataFile> targetFiles = await UploadFilesAsync(filesToAdd, FileFormat.Text, targetLanguage);
List<DataFile> targetFiles = await UploadFilesAsync(
filesToAdd,
FileFormat.Text,
targetLanguage,
isTarget: true
);
foreach (var item in targetFiles.Select((file, i) => new { i, file }))
{
targetFileConfig.Add(
Expand All @@ -193,20 +203,11 @@ bool pretranslate

var sourceFileConfig = new List<TranslationCorpusFileConfig>();

if (sourceLanguage == targetLanguage && !pretranslate)
{
// if it's the same language, and we are not pretranslating, do nothing (echo for suggestions)
// if pretranslating, we need to upload the source separately
// if different languages, we are not echoing.
}
else
for (int i = 0; i < sourceFiles.Count; i++)
{
for (int i = 0; i < sourceFiles.Count; i++)
{
sourceFileConfig.Add(
new TranslationCorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] }
);
}
sourceFileConfig.Add(
new TranslationCorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] }
);
}

TranslationCorpus response = await TranslationEnginesClient.AddCorpusAsync(
Expand Down Expand Up @@ -239,12 +240,22 @@ public async Task<string> AddParallelTextCorpusToEngineAsync(
bool pretranslate
)
{
List<DataFile> sourceFiles = await UploadFilesAsync(filesToAdd, FileFormat.Text, sourceLanguage);
List<DataFile> sourceFiles = await UploadFilesAsync(
filesToAdd,
FileFormat.Text,
sourceLanguage,
isTarget: false
);

var targetFileConfig = new List<CorpusFileConfig>();
if (!pretranslate)
{
List<DataFile> targetFiles = await UploadFilesAsync(filesToAdd, FileFormat.Text, targetLanguage);
List<DataFile> targetFiles = await UploadFilesAsync(
filesToAdd,
FileFormat.Text,
targetLanguage,
isTarget: true
);
foreach (var item in targetFiles.Select((file, i) => new { i, file }))
{
targetFileConfig.Add(new CorpusFileConfig { FileId = item.file.Id, TextId = filesToAdd[item.i] });
Expand All @@ -263,18 +274,9 @@ bool pretranslate

var sourceFileConfig = new List<CorpusFileConfig>();

if (sourceLanguage == targetLanguage && !pretranslate)
{
// if it's the same language, and we are not pretranslating, do nothing (echo for suggestions)
// if pretranslating, we need to upload the source separately
// if different languages, we are not echoing.
}
else
for (int i = 0; i < sourceFiles.Count; i++)
{
for (int i = 0; i < sourceFiles.Count; i++)
{
sourceFileConfig.Add(new CorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] });
}
sourceFileConfig.Add(new CorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] });
}

CorpusConfig sourceCorpusConfig =
Expand Down Expand Up @@ -305,7 +307,8 @@ bool pretranslate
public async Task<List<DataFile>> UploadFilesAsync(
IEnumerable<string> filesToAdd,
FileFormat fileFormat,
string language
string language,
bool isTarget
)
{
string languageFolder = Path.GetFullPath(
Expand All @@ -325,7 +328,7 @@ string language

foreach (string fileName in filesToAdd)
{
string fullName = _prefix + language + "_" + fileName;
string fullName = _prefix + language + "_" + fileName + (isTarget ? "_trg" : "_src");

//delete files that have the name name
if (filenameToId.Contains(fullName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ row.Ref is not ScriptureRef sr
foreach (
Row row in AlignPretranslateCorpus(
sourcePretranslateCorpora,
targetCorpora.Select(tc => tc.TextCorpus).ToArray()
targetCorpora.Length > 0
? targetCorpora.Select(tc => tc.TextCorpus).ToArray()
: [new DictionaryTextCorpus()]
)
)
{
Expand Down

0 comments on commit d3300c7

Please sign in to comment.