Skip to content

Commit

Permalink
Add Word Alignment tests
Browse files Browse the repository at this point in the history
Fix Translation Engine - parallel corpus both TextIds and ScriptureRange should cause an exception
  • Loading branch information
johnml1135 committed Nov 6, 2024
1 parent d699845 commit 41d48d1
Show file tree
Hide file tree
Showing 10 changed files with 2,165 additions and 27 deletions.
10 changes: 10 additions & 0 deletions Serval.sln
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{0904BA95-D5B
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EchoEngine", "src\Echo\src\EchoEngine\EchoEngine.csproj", "{929FF600-8C7E-4498-A2A3-5534F3A3481E}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{41EE40B9-699C-4145-8AA7-0EE89C727A19}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Serval.WordAlignment.Tests", "src\Serval\test\Serval.WordAlignment.Tests\Serval.WordAlignment.Tests.csproj", "{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -192,6 +196,10 @@ Global
{929FF600-8C7E-4498-A2A3-5534F3A3481E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{929FF600-8C7E-4498-A2A3-5534F3A3481E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{929FF600-8C7E-4498-A2A3-5534F3A3481E}.Release|Any CPU.Build.0 = Release|Any CPU
{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -231,6 +239,8 @@ Global
{D201886D-9299-4758-80E8-694DBCF8DF93} = {9125C013-4F15-4761-BCD2-070524986737}
{0904BA95-D5BF-4AC2-A919-20A785EF45F5} = {D201886D-9299-4758-80E8-694DBCF8DF93}
{929FF600-8C7E-4498-A2A3-5534F3A3481E} = {0904BA95-D5BF-4AC2-A919-20A785EF45F5}
{41EE40B9-699C-4145-8AA7-0EE89C727A19} = {A78D900F-AE52-436C-88CE-A22EAEDECD91}
{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5} = {41EE40B9-699C-4145-8AA7-0EE89C727A19}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {9F18C25E-E140-43C3-B177-D562E1628370}
Expand Down
22 changes: 22 additions & 0 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,17 @@ pretranslateFilter is not null
Files = { inputCorpus.Files.Select(Map) }
};

if (
trainingFilter is not null
&& trainingFilter.TextIds is not null
&& trainingFilter.ScriptureRange is not null
)
{
throw new InvalidOperationException(
"Cannot specify both TextIds and ScriptureRange in the training filter."
);
}

if (
trainOnAll
|| (trainingFilter is not null && trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null)
Expand All @@ -858,6 +869,17 @@ pretranslateFilter is not null
returnCorpus.TrainOnTextIds.Add(trainingFilter.TextIds);
}

if (
pretranslateFilter is not null
&& pretranslateFilter.TextIds is not null
&& pretranslateFilter.ScriptureRange is not null
)
{
throw new InvalidOperationException(
"Cannot specify both TextIds and ScriptureRange in the pretranslation filter."
);
}

if (
pretranslateOnAll
|| (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public static class IServalBuilderExtensions
{
public static IServalBuilder AddWordAlignment(this IServalBuilder builder)
{
builder.AddApiOptions(builder.Configuration.GetSection(ApiOptions.Key));
builder.AddApiOptions(builder.Configuration!.GetSection(ApiOptions.Key));
builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key));

builder.Services.AddScoped<IBuildService, BuildService>();
Expand Down
137 changes: 111 additions & 26 deletions src/Serval/src/Serval.WordAlignment/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,17 +145,33 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok
try
{
StartBuildRequest request;
var trainOn = build.TrainOn?.ToDictionary(c => c.ParallelCorpusRef!);
var wordAlignOn = build.WordAlignOn?.ToDictionary(c => c.ParallelCorpusRef!);
Dictionary<string, TrainingCorpus>? trainOn = build.TrainOn?.ToDictionary(c => c.ParallelCorpusRef!);
Dictionary<string, TrainingCorpus>? wordAlignOn = build.WordAlignOn?.ToDictionary(c =>
c.ParallelCorpusRef!
);
IReadOnlyList<Models.ParallelCorpus> parallelCorpora = engine
.ParallelCorpora.Where(pc =>
trainOn == null
|| trainOn.ContainsKey(pc.Id)
|| wordAlignOn == null
|| wordAlignOn.ContainsKey(pc.Id)
)
.ToList();
request = new StartBuildRequest
{
EngineType = engine.Type,
EngineId = engine.Id,
BuildId = build.Id,
Corpora =
{
engine.ParallelCorpora.Select(c =>
Map(c, trainOn?.GetValueOrDefault(c.Id), wordAlignOn?.GetValueOrDefault(c.Id))
parallelCorpora.Select(c =>
Map(
c,
trainOn?.GetValueOrDefault(c.Id),
wordAlignOn?.GetValueOrDefault(c.Id),
trainOn is null,
wordAlignOn is null
)
)
}
};
Expand Down Expand Up @@ -342,13 +358,29 @@ private Shared.Models.AlignedWordPair Map(V1.AlignedWordPair source)
return new Shared.Models.AlignedWordPair { SourceIndex = source.SourceIndex, TargetIndex = source.TargetIndex };
}

private V1.ParallelCorpus Map(Models.ParallelCorpus source, TrainingCorpus? trainOn, TrainingCorpus? wordAlignOn)
private V1.ParallelCorpus Map(
Models.ParallelCorpus source,
TrainingCorpus? trainingCorpus,
TrainingCorpus? wordAlignmentCorpus,
bool trainOnAllCorpora,
bool wordAlignOnAllCorpora
)
{
string? referenceFileLocation =
source.TargetCorpora.Count > 0 && source.TargetCorpora[0].Files.Count > 0
? Map(source.TargetCorpora[0].Files[0]).Location
: null;

bool trainOnAllSources =
trainOnAllCorpora || (trainingCorpus is not null && trainingCorpus.SourceFilters is null);
bool wordAlignAllSources =
wordAlignOnAllCorpora || (wordAlignmentCorpus is not null && wordAlignmentCorpus.SourceFilters is null);

bool trainOnAllTargets =
trainOnAllCorpora || (trainingCorpus is not null && trainingCorpus.TargetFilters is null);
bool wordAlignAllTargets =
wordAlignOnAllCorpora || (wordAlignmentCorpus is not null && wordAlignmentCorpus.TargetFilters is null);

return new V1.ParallelCorpus
{
Id = source.Id,
Expand All @@ -357,9 +389,11 @@ private V1.ParallelCorpus Map(Models.ParallelCorpus source, TrainingCorpus? trai
source.SourceCorpora.Select(sc =>
Map(
sc,
trainOn?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
wordAlignOn?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
referenceFileLocation
trainingCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
wordAlignmentCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(),
referenceFileLocation,
trainOnAllSources,
wordAlignAllSources
)
)
},
Expand All @@ -368,20 +402,24 @@ private V1.ParallelCorpus Map(Models.ParallelCorpus source, TrainingCorpus? trai
source.TargetCorpora.Select(tc =>
Map(
tc,
trainOn?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(),
trainingCorpus?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(),
null,
referenceFileLocation
referenceFileLocation,
trainOnAllTargets,
wordAlignAllTargets
)
)
}
};
}

private V1.MonolingualCorpus Map(
Shared.Models.MonolingualCorpus source,
Shared.Models.MonolingualCorpus inputCorpus,
ParallelCorpusFilter? trainingFilter,
ParallelCorpusFilter? wordAlignmentFilter,
string? referenceFileLocation
string? referenceFileLocation,
bool trainOnAll,
bool wordAlignOnAll
)
{
Dictionary<string, ScriptureChapters>? trainOnChapters = null;
Expand Down Expand Up @@ -410,7 +448,7 @@ wordAlignmentFilter is not null
&& referenceFileLocation is not null
)
{
GetChapters(referenceFileLocation, wordAlignmentFilter.ScriptureRange)
wordAlignmentChapters = GetChapters(referenceFileLocation, wordAlignmentFilter.ScriptureRange)
.Select(
(kvp) =>
{
Expand All @@ -422,23 +460,70 @@ wordAlignmentFilter is not null
.ToDictionary();
}

var corpus = new V1.MonolingualCorpus
var returnCorpus = new V1.MonolingualCorpus
{
Id = source.Id,
Language = source.Language,
Files = { source.Files.Select(Map) }
Id = inputCorpus.Id,
Language = inputCorpus.Language,
Files = { inputCorpus.Files.Select(Map) }
};

if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
if (wordAlignmentChapters is not null)
corpus.WordAlignOnChapters.Add(wordAlignmentChapters);
if (wordAlignmentFilter?.TextIds is not null)
corpus.WordAlignOnTextIds.Add(wordAlignmentFilter.TextIds);
if (
trainingFilter is not null
&& trainingFilter.TextIds is not null
&& trainingFilter.ScriptureRange is not null
)
{
throw new InvalidOperationException(
"Cannot specify both TextIds and ScriptureRange in the training filter."
);
}

if (
trainOnAll
|| (trainingFilter is not null && trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null)
)
{
returnCorpus.TrainOnAll = true;
}
else
{
if (trainOnChapters is not null)
returnCorpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
returnCorpus.TrainOnTextIds.Add(trainingFilter.TextIds);
}

if (
wordAlignmentFilter is not null
&& wordAlignmentFilter.TextIds is not null
&& wordAlignmentFilter.ScriptureRange is not null
)
{
throw new InvalidOperationException(
"Cannot specify both TextIds and ScriptureRange in the word alignment filter."
);
}

if (
wordAlignOnAll
|| (
wordAlignmentFilter is not null
&& wordAlignmentFilter.TextIds is null
&& wordAlignmentFilter.ScriptureRange is null
)
)
{
returnCorpus.WordAlignOnAll = true;
}
else
{
if (wordAlignmentChapters is not null)
returnCorpus.WordAlignOnChapters.Add(wordAlignmentChapters);
if (wordAlignmentFilter?.TextIds is not null)
returnCorpus.WordAlignOnTextIds.Add(wordAlignmentFilter.TextIds);
}

return corpus;
return returnCorpus;
}

private V1.CorpusFile Map(Shared.Models.CorpusFile source)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,48 @@ await env.Service.StartBuildAsync(
);
}

[Test]
public async Task StartBuildAsync_TextFilesScriptureRangeSpecified_ParallelCorpus()
{
var env = new TestEnvironment();
string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id;
Assert.ThrowsAsync<InvalidOperationException>(
() =>
env.Service.StartBuildAsync(
new Build
{
Id = BUILD1_ID,
EngineRef = engineId,
TrainOn =
[
new TrainingCorpus
{
ParallelCorpusRef = "parallel-corpus1",
SourceFilters = new List<ParallelCorpusFilter>()
{
new()
{
CorpusRef = "parallel-corpus1-source1",
ScriptureRange = "MAT",
TextIds = []
}
},
TargetFilters = new List<ParallelCorpusFilter>()
{
new()
{
CorpusRef = "parallel-corpus1-target1",
ScriptureRange = "MAT",
TextIds = []
}
}
}
]
}
)
);
}

[Test]
public async Task StartBuildAsync_NoFilters_ParallelCorpus()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<RootNamespace>Serval.WordAlignment</RootNamespace>
<EnforceCodeStyleInBuild>true</EnforceCodeStyleInBuild>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<NoWarn>$(NoWarn);CS1591;CS1573</NoWarn>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="coverlet.collector" Version="6.0.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageReference Include="NSubstitute" Version="5.1.0" />
<PackageReference Include="NSubstitute.Analyzers.CSharp" Version="1.0.16">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="NUnit" Version="4.0.1" />
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0" />
<PackageReference Include="NUnit.Analyzers" Version="4.0.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\src\Serval.WordAlignment\Serval.WordAlignment.csproj" />
<ProjectReference Include="..\Serval.Shared.Tests\Serval.Shared.Tests.csproj" />
</ItemGroup>

</Project>
Loading

0 comments on commit 41d48d1

Please sign in to comment.