Skip to content

Commit 27fe48f

Browse files
Toflarausi
andauthored
Fix algorithm implementation (#2)
* Added failing test * Simplified code a little * Fix getReachableStates() method (#3) * Defensive programming * Remove redundant call * Cleanup tests --------- Co-authored-by: Martin Auswöger <[email protected]>
1 parent 4abf06e commit 27fe48f

File tree

2 files changed

+25
-10
lines changed

2 files changed

+25
-10
lines changed

src/StateSetIndex.php

+11-9
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public function findMatchingStates(string $string, int $editDistance): array
8282
// Initial states
8383
$states = $this->getReachableStates(0, $editDistance);
8484

85-
$this->loopOverEveryCharacter($string, function (int $mappedChar, $char) use (&$states, $editDistance) {
85+
$this->loopOverEveryCharacter($string, function (int $mappedChar) use (&$states, $editDistance) {
8686
$statesStar = new CostAnnotatedStateSet(); // This is S∗ in the paper
8787

8888
foreach ($states->all() as $state => $cost) {
@@ -98,7 +98,7 @@ public function findMatchingStates(string $string, int $editDistance): array
9898
$newState = (int) ($state * $this->config->getAlphabetSize() + $i);
9999

100100
if ($this->stateSet->has($newState)) {
101-
if ($i === $this->getAlphabet()->map($char, $this->config->getAlphabetSize())) {
101+
if ($i === $mappedChar) {
102102
// Match
103103
$statesStarC->add($newState, $cost);
104104
} elseif ($cost + 1 <= $editDistance) {
@@ -181,12 +181,14 @@ private function getReachableStates(int $startState, int $editDistance, int $cur
181181
// A state is always able to reach itself
182182
$reachable->add($startState, $currentDistance);
183183

184-
for ($i = 0; $i <= $editDistance; $i++) {
185-
for ($c = 0; $c < $this->config->getAlphabetSize(); $c++) {
186-
$state = $startState + $c * $i;
187-
if ($this->stateSet->has($state)) {
188-
$reachable->add($startState, $currentDistance);
189-
}
184+
if ($currentDistance >= $editDistance) {
185+
return $reachable;
186+
}
187+
188+
for ($c = 1; $c <= $this->config->getAlphabetSize(); $c++) {
189+
$state = $startState * $this->config->getAlphabetSize() + $c;
190+
if ($this->stateSet->has($state)) {
191+
$reachable = $reachable->mergeWith($this->getReachableStates($state, $editDistance, $currentDistance + 1));
190192
}
191193
}
192194

@@ -203,7 +205,7 @@ private function loopOverEveryCharacter(string $string, \Closure $closure): void
203205

204206
foreach (mb_str_split($indexedSubstring) as $char) {
205207
$mappedChar = $this->alphabet->map($char, $this->config->getAlphabetSize());
206-
$closure($mappedChar, $char);
208+
$closure($mappedChar);
207209
}
208210
}
209211
}

tests/StateSetIndexTest.php

+14-1
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,17 @@ public function testWithUtf8Alphabet(): void
4949
$this->assertSame([2710 => ['Mueller'], 2843 => ['Muster', 'Mustermann']], $stateSetIndex->findAcceptedStrings('Mustre', 2));
5050
$this->assertSame(['Muster'], $stateSetIndex->find('Mustre', 2));
5151
}
52-
}
52+
53+
/**
54+
* This use case occurred while testing 2.0.0, which is why this is added as additional test case.
55+
*/
56+
public function testAssassinCanBeFound(): void
57+
{
58+
$stateSetIndex = new StateSetIndex(new Config(14, 4), new Utf8Alphabet(), new InMemoryStateSet(), new InMemoryDataStore());
59+
$stateSetIndex->index(['assassin']);
60+
61+
$this->assertSame([844, 3380, 13522, 54091], $stateSetIndex->findMatchingStates('assasin', 2));
62+
$this->assertSame([54091 => ['assassin']], $stateSetIndex->findAcceptedStrings('assasin', 2));
63+
$this->assertSame(['assassin'], $stateSetIndex->find('assasin', 2));
64+
}
65+
}

0 commit comments

Comments
 (0)