Skip to content

Commit

Permalink
Deduplicate kbts (#551)
Browse files Browse the repository at this point in the history
* Deduplicate kbts

* Switch to tuple
Enkidu93 authored Dec 3, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 28f3c44 commit aa7f31f
Showing 2 changed files with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -121,7 +121,7 @@ public async Task RunAsync_EnableKeyTerms()
Assert.That(src1Count, Is.EqualTo(14));
Assert.That(src2Count, Is.EqualTo(0));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(166));
Assert.That(termCount, Is.EqualTo(144));
});
}

Original file line number Diff line number Diff line change
@@ -101,7 +101,11 @@ public async Task PreprocessAsync(
IParallelTextCorpus parallelKeyTermsCorpus = sourceTermCorpora
.ChooseRandom(Seed)
.AlignRows(targetTermCorpora.ChooseFirst());
foreach (ParallelTextRow row in parallelKeyTermsCorpus)
foreach (
ParallelTextRow row in parallelKeyTermsCorpus.DistinctBy(row =>
(row.SourceText, row.TargetText)
)
)
{
await train(new Row(row.TextId, row.Refs, row.SourceText, row.TargetText, 1));
}

0 comments on commit aa7f31f

Please sign in to comment.