Skip to content

Commit

Permalink
Fix algorithm implementation (#2)
Browse files Browse the repository at this point in the history
* Added failing test

* Simplified code a little

* Fix getReachableStates() method (#3)

* Defensive programming

* Remove redundant call

* Cleanup tests

---------

Co-authored-by: Martin Auswöger <[email protected]>
  • Loading branch information
Toflar and ausi authored Oct 6, 2023
1 parent 4abf06e commit 27fe48f
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
20 changes: 11 additions & 9 deletions src/StateSetIndex.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public function findMatchingStates(string $string, int $editDistance): array
// Initial states
$states = $this->getReachableStates(0, $editDistance);

$this->loopOverEveryCharacter($string, function (int $mappedChar, $char) use (&$states, $editDistance) {
$this->loopOverEveryCharacter($string, function (int $mappedChar) use (&$states, $editDistance) {
$statesStar = new CostAnnotatedStateSet(); // This is S∗ in the paper

foreach ($states->all() as $state => $cost) {
Expand All @@ -98,7 +98,7 @@ public function findMatchingStates(string $string, int $editDistance): array
$newState = (int) ($state * $this->config->getAlphabetSize() + $i);

if ($this->stateSet->has($newState)) {
if ($i === $this->getAlphabet()->map($char, $this->config->getAlphabetSize())) {
if ($i === $mappedChar) {
// Match
$statesStarC->add($newState, $cost);
} elseif ($cost + 1 <= $editDistance) {
Expand Down Expand Up @@ -181,12 +181,14 @@ private function getReachableStates(int $startState, int $editDistance, int $cur
// A state is always able to reach itself
$reachable->add($startState, $currentDistance);

for ($i = 0; $i <= $editDistance; $i++) {
for ($c = 0; $c < $this->config->getAlphabetSize(); $c++) {
$state = $startState + $c * $i;
if ($this->stateSet->has($state)) {
$reachable->add($startState, $currentDistance);
}
if ($currentDistance >= $editDistance) {
return $reachable;
}

for ($c = 1; $c <= $this->config->getAlphabetSize(); $c++) {
$state = $startState * $this->config->getAlphabetSize() + $c;
if ($this->stateSet->has($state)) {
$reachable = $reachable->mergeWith($this->getReachableStates($state, $editDistance, $currentDistance + 1));
}
}

Expand All @@ -203,7 +205,7 @@ private function loopOverEveryCharacter(string $string, \Closure $closure): void

foreach (mb_str_split($indexedSubstring) as $char) {
$mappedChar = $this->alphabet->map($char, $this->config->getAlphabetSize());
$closure($mappedChar, $char);
$closure($mappedChar);
}
}
}
15 changes: 14 additions & 1 deletion tests/StateSetIndexTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,17 @@ public function testWithUtf8Alphabet(): void
$this->assertSame([2710 => ['Mueller'], 2843 => ['Muster', 'Mustermann']], $stateSetIndex->findAcceptedStrings('Mustre', 2));
$this->assertSame(['Muster'], $stateSetIndex->find('Mustre', 2));
}
}

/**
* This use case occurred while testing 2.0.0, which is why this is added as additional test case.
*/
public function testAssassinCanBeFound(): void
{
$stateSetIndex = new StateSetIndex(new Config(14, 4), new Utf8Alphabet(), new InMemoryStateSet(), new InMemoryDataStore());
$stateSetIndex->index(['assassin']);

$this->assertSame([844, 3380, 13522, 54091], $stateSetIndex->findMatchingStates('assasin', 2));
$this->assertSame([54091 => ['assassin']], $stateSetIndex->findAcceptedStrings('assasin', 2));
$this->assertSame(['assassin'], $stateSetIndex->find('assasin', 2));
}
}

0 comments on commit 27fe48f

Please sign in to comment.