Skip to content

Commit

Permalink
broken
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Nov 19, 2024
1 parent a6cc484 commit ad946b2
Show file tree
Hide file tree
Showing 6 changed files with 344 additions and 196 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,14 @@ public static IMachineBuilder AddHangfireJobServer(
switch (engineType)
{
case EngineType.SmtTransfer:
builder.Services.AddSingleton<SmtTransferEngineStateService>();
builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser();
builder.AddThot();
queues.Add("smt_transfer");
break;
case EngineType.Nmt:
queues.Add("nmt");
break;
case EngineType.Statistical:
builder.AddThot();
queues.Add("statistical");
break;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,71 @@ ILanguageTagService languageTagService
{
private readonly ILanguageTagService _languageTagService = languageTagService;

protected override int WriteInferences(Utf8JsonWriter inferenceWriter, ParallelCorpus corpus)
{
(MonolingualCorpus Corpus, ITextCorpus TextCorpus)[] sourceCorpora = corpus
.SourceCorpora.SelectMany(c => CorpusService.CreateTextCorpora(c.Files).Select(tc => (c, tc)))
.ToArray();
(MonolingualCorpus Corpus, ITextCorpus TextCorpus)[] targetCorpora = corpus
.TargetCorpora.SelectMany(c => CorpusService.CreateTextCorpora(c.Files).Select(tc => (c, tc)))
.ToArray();

int inferenceCount = 0;

ITextCorpus targetCorpus = targetCorpora.Length > 0 ? targetCorpora[0].TextCorpus : new DictionaryTextCorpus();
ITextCorpus? sourcePretranslateCorpus = sourceCorpora
.Select(sc =>
{
ITextCorpus textCorpus = sc.TextCorpus;
if (sc.Corpus.InferenceTextIds is not null)
{
textCorpus = textCorpus.FilterTexts(
sc.Corpus.InferenceTextIds.Except(sc.Corpus.TrainOnTextIds ?? new())
);
}
return textCorpus.Where(row =>
row.Ref is not ScriptureRef sr
|| sc.Corpus.InferenceChapters is null
|| (
IsInChapters(sr, sc.Corpus.InferenceChapters)
&& !IsInChapters(sr, sc.Corpus.TrainOnChapters ?? new())
)
);
})
.ToArray()
.FirstOrDefault();

if (sourcePretranslateCorpus != null)
{
foreach (Row row in AlignInferenceCorpus(sourcePretranslateCorpus, targetCorpus))
{
if (row.SourceSegment.Length > 0 && (row.TargetSegment.Length == 0 || !targetCorpus.Any()))
WriteRow(inferenceWriter, corpus.Id, row.TextId, row.Refs, row.SourceSegment);
inferenceCount++;
}
}
return inferenceCount;
}

private static void WriteRow(
Utf8JsonWriter writer,
string corpusId,
string textId,
IReadOnlyList<object> refs,
string translation
)
{
writer.WriteStartObject();
writer.WriteString("corpusId", corpusId);
writer.WriteString("textId", textId);
writer.WriteStartArray("refs");
foreach (object rowRef in refs)
writer.WriteStringValue(rowRef.ToString());
writer.WriteEndArray();
writer.WriteString("translation", translation);
writer.WriteEndObject();
}

protected override bool ResolveLanguageCodeForBaseModel(string languageCode, out string resolvedCode)
{
return _languageTagService.ConvertToFlores200Code(languageCode, out resolvedCode);
Expand Down
Loading

0 comments on commit ad946b2

Please sign in to comment.