Skip to content

Commit

Permalink
Add MaxUnapplications and GuessRoot
Browse files Browse the repository at this point in the history
  • Loading branch information
jtmaxwell3 committed Sep 9, 2024
1 parent cb024ef commit 3344aad
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ public IEnumerable<Word> Apply(Word input)
output.Add(mruleOutWord);
if (_morpher.TraceManager.IsTracing)
_morpher.TraceManager.EndUnapplyStratum(_stratum, mruleOutWord);
if (_morpher.MaxUnapplications > 0 && output.Count >= _morpher.MaxUnapplications)
break;
}
return output;
}
Expand Down
82 changes: 81 additions & 1 deletion src/SIL.Machine.Morphology.HermitCrab/Morpher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ public Morpher(ITraceManager traceManager, Language lang)
_analysisRule = lang.CompileAnalysisRule(this);
_synthesisRule = lang.CompileSynthesisRule(this);
MaxStemCount = 2;
MaxUnapplications = 0;
GuessRoot = false;
LexEntrySelector = entry => true;
RuleSelector = rule => true;

Expand All @@ -63,6 +65,18 @@ public ITraceManager TraceManager

public int MaxStemCount { get; set; }

/// <summary>
/// MaxUnapplications limits the number of unapplications to make it possible
/// to make it possible to debug words that take 30 minutes to parse
/// because there are too many unapplications.
/// </summary>
public int MaxUnapplications { get; set; }

/// <summary>
/// When GuessRoot is true, guess LexEntries for the roots of the analyses.
/// </summary>
public bool GuessRoot { get; set; }

public Func<LexEntry, bool> LexEntrySelector { get; set; }
public Func<IHCRule, bool> RuleSelector { get; set; }

Expand Down Expand Up @@ -104,8 +118,31 @@ public IEnumerable<Word> ParseWord(string word, out object trace)

File.WriteAllLines("analyses.txt", lines.OrderBy(l => l));
#endif
var origAnalyses = GuessRoot ? analyses.ToList() : null;
var syntheses = Synthesize(word, analyses);
if (GuessRoot && syntheses.Count() == 0)
{
// Guess roots when there are no results.
List<Word> matches = new List<Word>();
foreach (Word analysisWord in origAnalyses)
{
var lexicalGuesses = LexicalGuess(analysisWord).Distinct();
foreach (Word synthesisWord in lexicalGuesses)
{
foreach (Word validWord in _synthesisRule.Apply(synthesisWord).Where(IsWordValid))
{
if (IsMatch(word, validWord))
matches.Add(validWord);
}
}
}

matches.Sort((x, y) => y.Morphs.Count().CompareTo(x.Morphs.Count()));

return matches;
}
return syntheses;

return Synthesize(word, analyses);
}

/// <summary>
Expand Down Expand Up @@ -309,6 +346,49 @@ LexEntry entry in SearchRootAllomorphs(input.Stratum, input.Shape)
}
}

private IEnumerable<Word> LexicalGuess(Word input)
{
if (_traceManager.IsTracing)
_traceManager.LexicalLookup(input.Stratum, input);
var table = input.Stratum.CharacterDefinitionTable;
var allRange = Range<ShapeNode>.Create(input.Shape.First, input.Shape.Last);
var shapeStrings = EnumerateShapeStrings(input.Shape.GetNodes(allRange).ToList(), 0, "", table);
foreach (string shapeString in shapeStrings)
{
var lexEntry = new LexEntry
{
Id = shapeString,
SyntacticFeatureStruct = input.SyntacticFeatureStruct,
Gloss = shapeString,
Stratum = input.Stratum,
IsPartial = input.SyntacticFeatureStruct.IsEmpty
};
var root = new RootAllomorph(new Segments(table, shapeString));
lexEntry.Allomorphs.Add(root);
Word newWord = input.Clone();
newWord.RootAllomorph = root;
if (_traceManager.IsTracing)
_traceManager.SynthesizeWord(_lang, newWord);
newWord.Freeze();
yield return newWord;
}
}

IEnumerable<string> EnumerateShapeStrings(IList<ShapeNode> nodes, int index, string prefix, CharacterDefinitionTable table)
{
if (index == nodes.Count)
{
return new List<string> { prefix };
}
string[] strReps = table.GetMatchingStrReps(nodes[index]).ToArray();
List<string> strings = new List<string>();
foreach (string strRep in strReps)
{
strings.AddRange(EnumerateShapeStrings(nodes, index + 1, prefix + strRep, table));
}
return strings;
}

private bool IsWordValid(Word word)
{
if (
Expand Down
32 changes: 32 additions & 0 deletions tests/SIL.Machine.Morphology.HermitCrab.Tests/MorpherTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,38 @@ public void AnalyzeWord_CannotAnalyze_ReturnsEmptyEnumerable()
Assert.That(morpher.AnalyzeWord("sagt"), Is.Empty);
}

[Test]
public void AnalyzeWord_CanGuess_ReturnsCorrectAnalysis()
{
var any = FeatureStruct.New().Symbol(HCFeatureSystem.Segment).Value;

var edSuffix = new AffixProcessRule
{
Id = "PAST",
Name = "ed_suffix",
Gloss = "PAST",
RequiredSyntacticFeatureStruct = FeatureStruct.New(Language.SyntacticFeatureSystem).Symbol("V").Value
};
edSuffix.Allomorphs.Add(
new AffixProcessAllomorph
{
Lhs = { Pattern<Word, ShapeNode>.New("1").Annotation(any).OneOrMore.Value },
Rhs = { new CopyFromInput("1"), new InsertSegments(Table3, "+d") }
}
);
Morphophonemic.MorphologicalRules.Add(edSuffix);

var morpher = new Morpher(TraceManager, Language);
Assert.That(morpher.AnalyzeWord("gag"), Is.Empty);
Assert.That(morpher.AnalyzeWord("gagd"), Is.Empty);

morpher.GuessRoot = true;
var analyses = morpher.AnalyzeWord("gag").ToList();
Assert.That(analyses[0].ToString(), Is.EquivalentTo("[*gag]"));
var analyses2 = morpher.AnalyzeWord("gagd").ToList();
Assert.That(analyses2[0].ToString(), Is.EquivalentTo("[*gag ed_suffix]"));
}

[Test]
public void GenerateWords_CanGenerate_ReturnsCorrectWord()
{
Expand Down

0 comments on commit 3344aad

Please sign in to comment.