Skip to content

Commit bb7fcd1

Browse files
committed
fixup! use Decoration
1 parent 554e6fc commit bb7fcd1

File tree

3 files changed

+89
-72
lines changed

3 files changed

+89
-72
lines changed

Cargo.lock

Lines changed: 4 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/matrix-sdk/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ async-channel = "2.2.1"
7575
async-stream = { workspace = true }
7676
async-trait = { workspace = true }
7777
axum = { version = "0.7.4", optional = true }
78-
bloomfilter = { version = "1.0.13", default-features = false }
7978
bytes = "1.1.0"
8079
bytesize = "1.1"
8180
chrono = { version = "0.4.23", optional = true }
@@ -85,6 +84,7 @@ eyeball-im = { workspace = true }
8584
eyre = { version = "0.6.8", optional = true }
8685
futures-core = { workspace = true }
8786
futures-util = { workspace = true }
87+
growable-bloom-filter = { workspace = true }
8888
http = { workspace = true }
8989
imbl = { workspace = true, features = ["serde"] }
9090
indexmap = "2.0.2"

crates/matrix-sdk/src/event_cache/deduplicator.rs

Lines changed: 84 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,47 +12,55 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::{collections::BTreeSet, ops::Not};
15+
use std::collections::BTreeSet;
1616

17-
use bloomfilter::Bloom;
17+
use growable_bloom_filter::{GrowableBloom, GrowableBloomBuilder};
1818
use matrix_sdk_base::deserialized_responses::SyncTimelineEvent;
19-
use ruma::OwnedEventId;
2019

2120
use super::store::RoomEvents;
2221

2322
pub struct Deduplicator {
24-
bloom_filter: Bloom<OwnedEventId>,
23+
bloom_filter: GrowableBloom,
2524
}
2625

2726
impl Deduplicator {
2827
const APPROXIMATED_MAXIMUM_NUMBER_OF_EVENTS: usize = 800_000;
2928
const DESIRED_FALSE_POSITIVE_RATE: f64 = 0.001;
30-
const SEED_FOR_HASHER: &'static [u8; 32] = b"matrix_sdk_event_cache_deduptor!";
3129

30+
/// Create a new `Self`.
3231
pub fn new() -> Self {
3332
Self {
34-
bloom_filter: Bloom::new_for_fp_rate_with_seed(
35-
Self::APPROXIMATED_MAXIMUM_NUMBER_OF_EVENTS,
36-
Self::DESIRED_FALSE_POSITIVE_RATE,
37-
Self::SEED_FOR_HASHER,
38-
),
33+
bloom_filter: GrowableBloomBuilder::new()
34+
.estimated_insertions(Self::APPROXIMATED_MAXIMUM_NUMBER_OF_EVENTS)
35+
.desired_error_ratio(Self::DESIRED_FALSE_POSITIVE_RATE)
36+
.build(),
3937
}
4038
}
4139

42-
pub fn filter_and_learn<'a, I>(
40+
/// Scan a collection of events and detect duplications.
41+
///
42+
/// This method takes a collection of events `events_to_scan` and returns a
43+
/// new collection of events, where each event is decorated by a
44+
/// [`Decoration`], so that the caller can decide what to do with these
45+
/// events.
46+
///
47+
/// Each scanned event will update `Self`'s internal state.
48+
///
49+
/// `existing_events` represents all events of a room that already exist.
50+
pub fn scan_and_learn<'a, I>(
4351
&'a mut self,
44-
events: I,
45-
room_events: &'a RoomEvents,
46-
) -> impl Iterator<Item = I::Item> + 'a
52+
events_to_scan: I,
53+
existing_events: &'a RoomEvents,
54+
) -> impl Iterator<Item = Decoration<I::Item>> + 'a
4755
where
4856
I: Iterator<Item = SyncTimelineEvent> + 'a,
4957
{
5058
let mut already_seen = BTreeSet::new();
5159

52-
events.filter(move |event| {
60+
events_to_scan.map(move |event| {
5361
let Some(event_id) = event.event_id() else {
54-
// The event has no `event_id`. Safe path: filter it out.
55-
return false;
62+
// The event has no `event_id`.
63+
return Decoration::Invalid(event);
5664
};
5765

5866
if self.bloom_filter.check_and_set(&event_id) {
@@ -64,33 +72,47 @@ impl Deduplicator {
6472
// iterator itself contains duplicated events! We use a `BTreetSet`, otherwise
6573
// using a bloom filter again may generate false positives.
6674
if already_seen.contains(&event_id) {
67-
// The iterator contains a duplicated `event`. Let's filter it out.
68-
return false;
75+
// The iterator contains a duplicated `event`.
76+
return Decoration::Duplicated(event);
6977
}
7078

7179
// Now we can iterate over all events to ensure `event` is not present in
72-
// `room_events`.
73-
let result = room_events
74-
.revents()
75-
.any(|(_position, other_event)| {
76-
other_event.event_id().as_ref() == Some(&event_id)
77-
})
78-
.not();
80+
// `existing_events`.
81+
let duplicated = existing_events.revents().any(|(_position, other_event)| {
82+
other_event.event_id().as_ref() == Some(&event_id)
83+
});
7984

8085
already_seen.insert(event_id);
8186

82-
result
87+
if duplicated {
88+
Decoration::Duplicated(event)
89+
} else {
90+
Decoration::Ok(event)
91+
}
8392
} else {
8493
already_seen.insert(event_id);
8594

8695
// Bloom filter has no false negatives. We are sure the event is NOT present: we
8796
// can keep it in the iterator.
88-
true
97+
Decoration::Ok(event)
8998
}
9099
})
91100
}
92101
}
93102

103+
/// Information about the scanned collection of events.
104+
#[derive(Debug)]
105+
pub enum Decoration<I> {
106+
/// This event is not duplicated.
107+
Ok(I),
108+
109+
/// This event is duplicated.
110+
Duplicated(I),
111+
112+
/// This event is invalid (i.e. not well formed).
113+
Invalid(I),
114+
}
115+
94116
#[cfg(test)]
95117
mod tests {
96118
use assert_matches2::assert_let;
@@ -120,18 +142,18 @@ mod tests {
120142
let event_2 = sync_timeline_event(&event_builder, &event_id_2);
121143

122144
let mut deduplicator = Deduplicator::new();
123-
let room_events = RoomEvents::new();
145+
let existing_events = RoomEvents::new();
124146

125147
let mut events =
126-
deduplicator.filter_and_learn([event_0, event_1, event_2].into_iter(), &room_events);
148+
deduplicator.scan_and_learn([event_0, event_1, event_2].into_iter(), &existing_events);
127149

128-
assert_let!(Some(event) = events.next());
150+
assert_let!(Some(Decoration::Ok(event)) = events.next());
129151
assert_eq!(event.event_id(), Some(event_id_0));
130152

131-
assert_let!(Some(event) = events.next());
153+
assert_let!(Some(Decoration::Ok(event)) = events.next());
132154
assert_eq!(event.event_id(), Some(event_id_1));
133155

134-
assert_let!(Some(event) = events.next());
156+
assert_let!(Some(Decoration::Ok(event)) = events.next());
135157
assert_eq!(event.event_id(), Some(event_id_2));
136158

137159
assert!(events.next().is_none());
@@ -148,22 +170,25 @@ mod tests {
148170
let event_1 = sync_timeline_event(&event_builder, &event_id_1);
149171

150172
let mut deduplicator = Deduplicator::new();
151-
let room_events = RoomEvents::new();
173+
let existing_events = RoomEvents::new();
152174

153-
let mut events = deduplicator.filter_and_learn(
175+
let mut events = deduplicator.scan_and_learn(
154176
[
155177
event_0.clone(), // OK
156178
event_0, // Not OK
157179
event_1, // OK
158180
]
159181
.into_iter(),
160-
&room_events,
182+
&existing_events,
161183
);
162184

163-
assert_let!(Some(event) = events.next());
185+
assert_let!(Some(Decoration::Ok(event)) = events.next());
186+
assert_eq!(event.event_id(), Some(event_id_0.clone()));
187+
188+
assert_let!(Some(Decoration::Duplicated(event)) = events.next());
164189
assert_eq!(event.event_id(), Some(event_id_0));
165190

166-
assert_let!(Some(event) = events.next());
191+
assert_let!(Some(Decoration::Ok(event)) = events.next());
167192
assert_eq!(event.event_id(), Some(event_id_1));
168193

169194
assert!(events.next().is_none());
@@ -182,35 +207,43 @@ mod tests {
182207
let event_2 = sync_timeline_event(&event_builder, &event_id_2);
183208

184209
let mut deduplicator = Deduplicator::new();
185-
let mut room_events = RoomEvents::new();
210+
let mut existing_events = RoomEvents::new();
186211

187-
// Simulate `event_1` is inserted inside `room_events`.
212+
// Simulate `event_1` is inserted inside `existing_events`.
188213
{
189214
let mut events =
190-
deduplicator.filter_and_learn([event_1.clone()].into_iter(), &room_events);
215+
deduplicator.scan_and_learn([event_1.clone()].into_iter(), &existing_events);
191216

192-
assert_let!(Some(event_1) = events.next());
193-
assert_eq!(event_1.event_id(), Some(event_id_1));
217+
assert_let!(Some(Decoration::Ok(event_1)) = events.next());
218+
assert_eq!(event_1.event_id(), Some(event_id_1.clone()));
194219

195220
assert!(events.next().is_none());
196221

197222
drop(events); // make the borrow checker happy.
198223

199-
// Now we can push `event_1` inside `room_events`.
200-
room_events.push_event(event_1);
224+
// Now we can push `event_1` inside `existing_events`.
225+
existing_events.push_events([event_1.clone()]);
201226
}
202227

203228
// `event_1` will be duplicated.
204229
{
205-
let mut events = deduplicator
206-
.filter_and_learn([event_0, event_1, event_2].into_iter(), &room_events);
207-
208-
assert_let!(Some(event) = events.next());
230+
let mut events = deduplicator.scan_and_learn(
231+
[
232+
event_0, // OK
233+
event_1, // Not OK
234+
event_2, // Ok
235+
]
236+
.into_iter(),
237+
&existing_events,
238+
);
239+
240+
assert_let!(Some(Decoration::Ok(event)) = events.next());
209241
assert_eq!(event.event_id(), Some(event_id_0));
210242

211-
// `event_1` is missing.
243+
assert_let!(Some(Decoration::Duplicated(event)) = events.next());
244+
assert_eq!(event.event_id(), Some(event_id_1));
212245

213-
assert_let!(Some(event) = events.next());
246+
assert_let!(Some(Decoration::Ok(event)) = events.next());
214247
assert_eq!(event.event_id(), Some(event_id_2));
215248

216249
assert!(events.next().is_none());

0 commit comments

Comments
 (0)