diff --git a/src/StateSetIndex.php b/src/StateSetIndex.php index ac98191..a56241b 100644 --- a/src/StateSetIndex.php +++ b/src/StateSetIndex.php @@ -82,7 +82,7 @@ public function findMatchingStates(string $string, int $editDistance): array // Initial states $states = $this->getReachableStates(0, $editDistance); - $this->loopOverEveryCharacter($string, function (int $mappedChar, $char) use (&$states, $editDistance) { + $this->loopOverEveryCharacter($string, function (int $mappedChar) use (&$states, $editDistance) { $statesStar = new CostAnnotatedStateSet(); // This is S∗ in the paper foreach ($states->all() as $state => $cost) { @@ -98,7 +98,7 @@ public function findMatchingStates(string $string, int $editDistance): array $newState = (int) ($state * $this->config->getAlphabetSize() + $i); if ($this->stateSet->has($newState)) { - if ($i === $this->getAlphabet()->map($char, $this->config->getAlphabetSize())) { + if ($i === $mappedChar) { // Match $statesStarC->add($newState, $cost); } elseif ($cost + 1 <= $editDistance) { @@ -181,12 +181,14 @@ private function getReachableStates(int $startState, int $editDistance, int $cur // A state is always able to reach itself $reachable->add($startState, $currentDistance); - for ($i = 0; $i <= $editDistance; $i++) { - for ($c = 0; $c < $this->config->getAlphabetSize(); $c++) { - $state = $startState + $c * $i; - if ($this->stateSet->has($state)) { - $reachable->add($startState, $currentDistance); - } + if ($currentDistance >= $editDistance) { + return $reachable; + } + + for ($c = 1; $c <= $this->config->getAlphabetSize(); $c++) { + $state = $startState * $this->config->getAlphabetSize() + $c; + if ($this->stateSet->has($state)) { + $reachable = $reachable->mergeWith($this->getReachableStates($state, $editDistance, $currentDistance + 1)); } } @@ -203,7 +205,7 @@ private function loopOverEveryCharacter(string $string, \Closure $closure): void foreach (mb_str_split($indexedSubstring) as $char) { $mappedChar = $this->alphabet->map($char, $this->config->getAlphabetSize()); - $closure($mappedChar, $char); + $closure($mappedChar); } } } diff --git a/tests/StateSetIndexTest.php b/tests/StateSetIndexTest.php index 76e514d..a5a718f 100644 --- a/tests/StateSetIndexTest.php +++ b/tests/StateSetIndexTest.php @@ -49,4 +49,17 @@ public function testWithUtf8Alphabet(): void $this->assertSame([2710 => ['Mueller'], 2843 => ['Muster', 'Mustermann']], $stateSetIndex->findAcceptedStrings('Mustre', 2)); $this->assertSame(['Muster'], $stateSetIndex->find('Mustre', 2)); } -} \ No newline at end of file + + /** + * This use case occurred while testing 2.0.0, which is why this is added as additional test case. + */ + public function testAssassinCanBeFound(): void + { + $stateSetIndex = new StateSetIndex(new Config(14, 4), new Utf8Alphabet(), new InMemoryStateSet(), new InMemoryDataStore()); + $stateSetIndex->index(['assassin']); + + $this->assertSame([844, 3380, 13522, 54091], $stateSetIndex->findMatchingStates('assasin', 2)); + $this->assertSame([54091 => ['assassin']], $stateSetIndex->findAcceptedStrings('assasin', 2)); + $this->assertSame(['assassin'], $stateSetIndex->find('assasin', 2)); + } +}