Skip to content

Commit 9b85aa8

Browse files
authored
Support removing strings from the index (#7)
1 parent a14fc43 commit 9b85aa8

7 files changed

+122
-0
lines changed

Diff for: src/DataStore/DataStoreInterface.php

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ interface DataStoreInterface
66
{
77
public function add(int $state, string $string): void;
88

9+
public function remove(int $state, string $string): void;
10+
911
/**
1012
* Returns the matching strings per state. Key is the state and the value is an array of matching strings
1113
* for that state.

Diff for: src/DataStore/InMemoryDataStore.php

+11
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ public function add(int $state, string $string): void
1414
$this->data[$state][] = $string;
1515
}
1616

17+
public function remove(int $state, string $string): void
18+
{
19+
$updated = array_values(array_diff($this->data[$state] ?? [], [$string]));
20+
21+
if ($updated) {
22+
$this->data[$state] = $updated;
23+
} else {
24+
unset($this->data[$state]);
25+
}
26+
}
27+
1728
public function all(): array
1829
{
1930
return $this->data;

Diff for: src/DataStore/NullDataStore.php

+5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ public function add(int $state, string $string): void
99
// noop
1010
}
1111

12+
public function remove(int $state, string $string): void
13+
{
14+
// noop
15+
}
16+
1217
public function getForStates(array $states = []): array
1318
{
1419
return [];

Diff for: src/StateSet/InMemoryStateSet.php

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ public function add(int $state): void
1717
$this->states[$state] = true;
1818
}
1919

20+
public function remove(int $state): void
21+
{
22+
unset($this->states[$state]);
23+
}
24+
2025
public function all(): array
2126
{
2227
return array_keys($this->states);

Diff for: src/StateSet/StateSetInterface.php

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ interface StateSetInterface
66
{
77
public function add(int $state): void;
88

9+
public function remove(int $state): void;
10+
911
/**
1012
* @return array<int>
1113
*/

Diff for: src/StateSetIndex.php

+42
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,48 @@ public function index(array $strings): array
170170
return $assigned;
171171
}
172172

173+
/**
174+
* Removes an array of strings from the index.
175+
*/
176+
public function removeFromIndex(array $strings): void
177+
{
178+
foreach ($strings as $string) {
179+
unset($this->indexCache[$string]);
180+
181+
$states = [];
182+
$state = 0;
183+
$this->loopOverEveryCharacter($string, function (int $mappedChar) use (&$state, &$states) {
184+
$states[] = $state = (int) ($state * $this->config->getAlphabetSize() + $mappedChar);
185+
});
186+
187+
$this->dataStore->remove($state, $string);
188+
189+
foreach (array_reverse($states) as $state) {
190+
// If a state is shared with another string or a state exists that follows the current one we must stop
191+
// the removal process as all previous states and the current one must be kept.
192+
if (isset($this->dataStore->getForStates([$state])[$state]) || $this->hasNextState($state)) {
193+
continue 2;
194+
}
195+
196+
$this->stateSet->remove($state);
197+
}
198+
}
199+
}
200+
201+
/**
202+
* Returns true if a state exists that follows the given state
203+
*/
204+
private function hasNextState(int $state): bool
205+
{
206+
for ($c = 1; $c <= $this->config->getAlphabetSize(); ++$c) {
207+
if ($this->stateSet->has($state * $this->config->getAlphabetSize() + $c)) {
208+
return true;
209+
}
210+
}
211+
212+
return false;
213+
}
214+
173215
private function getReachableStates(int $startState, int $editDistance, int $currentDistance = 0): CostAnnotatedStateSet
174216
{
175217
$reachable = new CostAnnotatedStateSet();

Diff for: tests/StateSetIndexTest.php

+55
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,59 @@ public function testAssassinCanBeFound(): void
6262
$this->assertSame([54091 => ['assassin']], $stateSetIndex->findAcceptedStrings('assasin', 2));
6363
$this->assertSame(['assassin'], $stateSetIndex->find('assasin', 2));
6464
}
65+
66+
public function testRemoveFromIndex(): void
67+
{
68+
$stateSetIndex = new StateSetIndex(new Config(6, 4), new Utf8Alphabet(), new InMemoryStateSet(), new InMemoryDataStore());
69+
$stateSetIndex->index(['Mueller']);
70+
71+
$onlyMuellerStates = $stateSetIndex->getStateSet()->all();
72+
73+
$stateSetIndex->removeFromIndex(['Mueller']);
74+
75+
$this->assertSame([], $stateSetIndex->getStateSet()->all());
76+
77+
$stateSetIndex->index(['Müller', 'Muentner', 'Muster', 'Mustermann', 'Mueller']);
78+
$stateSetIndex->removeFromIndex(['Müller', 'Muentner', 'Muster', 'Mustermann']);
79+
80+
$this->assertEquals($onlyMuellerStates, $stateSetIndex->getStateSet()->all());
81+
$this->assertSame(['Mueller'], $stateSetIndex->find('Mueler', 1));
82+
}
83+
84+
public function testRemoveFromFullIndex(): void
85+
{
86+
$stateSetIndex = new StateSetIndex(new Config(5, 4), new Utf8Alphabet(), new InMemoryStateSet(), new InMemoryDataStore());
87+
$stateSetIndex->index(['Mueller']);
88+
89+
$onlyMuellerStates = $stateSetIndex->getStateSet()->all();
90+
91+
$stateSetIndex->removeFromIndex(['Mueller']);
92+
93+
$this->assertSame([], $stateSetIndex->getStateSet()->all());
94+
95+
for ($i = 0; $i < $stateSetIndex->getConfig()->getAlphabetSize(); ++$i) {
96+
$strings[] = \IntlChar::chr(97 + $i);
97+
}
98+
99+
for ($length = 1; $length <= $stateSetIndex->getConfig()->getIndexLength(); ++$length) {
100+
foreach ($strings as $string) {
101+
for ($i = 0; $i < $stateSetIndex->getConfig()->getAlphabetSize(); ++$i) {
102+
$strings[] = $string . \IntlChar::chr(97 + $i);
103+
}
104+
}
105+
}
106+
107+
// Fill every possible state for the configured length and size
108+
$stateSetIndex->index($strings);
109+
$stateSetIndex->index(['Mueller']);
110+
111+
$states = $stateSetIndex->getStateSet()->all();
112+
sort($states);
113+
114+
$this->assertSame(range(1, (((4 * 4 + 4) * 4 + 4) * 4 + 4) * 4 + 4), $states, 'No state should be missing');
115+
116+
$stateSetIndex->removeFromIndex($strings);
117+
118+
$this->assertEquals($onlyMuellerStates, $stateSetIndex->getStateSet()->all());
119+
}
65120
}

0 commit comments

Comments
 (0)