Skip to content

Commit

Permalink
Working new logic
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Nov 8, 2024
1 parent 6b1bfdb commit 4ec67d2
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 253 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,23 +111,29 @@ CancellationToken cancellationToken
corpora,
row =>
{
sourceTrainWriter.Write($"{row.SourceSegment}\n");
targetTrainWriter.Write($"{row.TargetSegment}\n");
if (row.SourceSegment.Length > 0 || row.TargetSegment.Length > 0)
{
sourceTrainWriter.Write($"{row.SourceSegment}\n");
targetTrainWriter.Write($"{row.TargetSegment}\n");
}
if (row.SourceSegment.Length > 0 && row.TargetSegment.Length > 0)
trainCount++;
},
(row, corpus) =>
{
pretranslateWriter.WriteStartObject();
pretranslateWriter.WriteString("corpusId", corpus.Id);
pretranslateWriter.WriteString("textId", row.TextId);
pretranslateWriter.WriteStartArray("refs");
foreach (object rowRef in row.Refs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
pretranslateWriter.WriteString("translation", row.SourceSegment);
pretranslateWriter.WriteEndObject();
pretranslateCount++;
if (row.SourceSegment.Length > 0 && row.TargetSegment.Length == 0)
{
pretranslateWriter.WriteStartObject();
pretranslateWriter.WriteString("corpusId", corpus.Id);
pretranslateWriter.WriteString("textId", row.TextId);
pretranslateWriter.WriteStartArray("refs");
foreach (object rowRef in row.Refs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
pretranslateWriter.WriteString("translation", row.SourceSegment);
pretranslateWriter.WriteEndObject();
pretranslateCount++;
}
},
(bool?)buildOptionsObject?["use_key_terms"] ?? true
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ public async Task RunAsync_TrainAndPretranslateAll()

await env.RunBuildJobAsync(corpus1);

Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2));
Assert.That(
await env.GetPretranslateCountAsync(),
Is.EqualTo(2),
(await env.GetPretranslationsAsync())?.ToJsonString()
);
}

[Test]
Expand All @@ -76,7 +80,7 @@ public async Task RunAsync_PretranslateAll()

await env.RunBuildJobAsync(corpus1);

Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(2));
Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4));
}

[Test]
Expand Down Expand Up @@ -205,14 +209,14 @@ public async Task RunAsync_MixedSource_Paratext()
(int src1Count, int src2Count, int trgCount, int termCount) = await env.GetTrainCountAsync();
Assert.Multiple(() =>
{
Assert.That(src1Count, Is.EqualTo(5));
Assert.That(src2Count, Is.EqualTo(12));
Assert.That(src1Count, Is.EqualTo(7));
Assert.That(src2Count, Is.EqualTo(13));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(0));
});
Assert.That(
await env.GetPretranslateCountAsync(),
Is.EqualTo(14),
Is.EqualTo(15),
JsonSerializer.Serialize(await env.GetPretranslationsAsync())
);
}
Expand All @@ -228,8 +232,8 @@ public async Task RunAsync_MixedSource_Text()
(int src1Count, int src2Count, int trgCount, int termCount) = await env.GetTrainCountAsync();
Assert.Multiple(() =>
{
Assert.That(src1Count, Is.EqualTo(3));
Assert.That(src2Count, Is.EqualTo(2));
Assert.That(src1Count, Is.EqualTo(1));
Assert.That(src2Count, Is.EqualTo(4));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(0));
});
Expand Down Expand Up @@ -475,13 +479,12 @@ public async Task ParallelCorpusLogic()
Is.EqualTo(
@"Source one, chapter fourteen, verse fifty-five. Segment b.
Source one, chapter fourteen, verse fifty-six.
Source one, chapter one, verse one.
Source two, chapter one, verse one.
Source two, chapter one, verse two.
Source two, chapter one, verse three.
Source two, chapter one, verse four.
Source one, chapter one, verse four.
Source two, chapter one, verse five. Source two, chapter one, verse six.
Source two, chapter one, verse seven. Source two, chapter one, verse eight.
Source two, chapter one, verse nine. Source two, chapter one, verse ten.
Source one, chapter one, verse seven, eight, and nine. Source one, chapter one, verse ten.
Source two, chapter one, verse one.
"
),
Expand All @@ -493,22 +496,21 @@ public async Task ParallelCorpusLogic()
Is.EqualTo(
@"Target two, chapter fourteen, verse fifty-five.
Target two, chapter fourteen, verse fifty-six.
Target two, chapter one, verse one.
Target two, chapter one, verse two.
Target one, chapter one, verse one.
Target one, chapter one, verse two.
Target one, chapter one, verse three.
Target two, chapter one, verse five and six.
Target one, chapter one, verse seven and eight.
Target two, chapter one, verse nine and ten.
Target one, chapter one, verse five and six.
Target one, chapter one, verse seven and eight. Target one, chapter one, verse nine and ten.
"
),
trg
);
Assert.That(pretranslations, Is.Not.Null);
Assert.That(pretranslations!.Count, Is.EqualTo(9), pretranslations.ToJsonString());
Assert.That(pretranslations!.Count, Is.EqualTo(7), pretranslations.ToJsonString());
Assert.That(
pretranslations[0]!["translation"]!.ToString(),
pretranslations[2]!["translation"]!.ToString(),
Is.EqualTo("Source one, chapter twelve, verse one."),
pretranslations.ToJsonString()
);
Expand Down
Loading

0 comments on commit 4ec67d2

Please sign in to comment.