Skip to content

Commit 554e6fc

Browse files
committed
feat(sdk): Deduplicator detects duplicates in new events.
1 parent 8a3b113 commit 554e6fc

File tree

1 file changed

+59
-9
lines changed

1 file changed

+59
-9
lines changed

crates/matrix-sdk/src/event_cache/deduplicator.rs

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::ops::Not;
15+
use std::{collections::BTreeSet, ops::Not};
1616

1717
use bloomfilter::Bloom;
1818
use matrix_sdk_base::deserialized_responses::SyncTimelineEvent;
@@ -47,24 +47,42 @@ impl Deduplicator {
4747
where
4848
I: Iterator<Item = SyncTimelineEvent> + 'a,
4949
{
50-
events.filter(|event| {
50+
let mut already_seen = BTreeSet::new();
51+
52+
events.filter(move |event| {
5153
let Some(event_id) = event.event_id() else {
5254
// The event has no `event_id`. Safe path: filter it out.
5355
return false;
5456
};
5557

5658
if self.bloom_filter.check_and_set(&event_id) {
57-
// Bloom filter has false positives. We are NOT sure the event
58-
// is NOT present. Even if the false positive rate is low, we
59-
// need to iterate over all events to ensure it isn't present.
60-
61-
room_events
59+
// Bloom filter has false positives. We are NOT sure the event is NOT present.
60+
// Even if the false positive rate is low, we need to iterate over all events to
61+
// ensure it isn't present.
62+
63+
// But first, let's ensure `event` is not a duplicate from `events`, i.e. if the
64+
// iterator itself contains duplicated events! We use a `BTreetSet`, otherwise
65+
// using a bloom filter again may generate false positives.
66+
if already_seen.contains(&event_id) {
67+
// The iterator contains a duplicated `event`. Let's filter it out.
68+
return false;
69+
}
70+
71+
// Now we can iterate over all events to ensure `event` is not present in
72+
// `room_events`.
73+
let result = room_events
6274
.revents()
6375
.any(|(_position, other_event)| {
6476
other_event.event_id().as_ref() == Some(&event_id)
6577
})
66-
.not()
78+
.not();
79+
80+
already_seen.insert(event_id);
81+
82+
result
6783
} else {
84+
already_seen.insert(event_id);
85+
6886
// Bloom filter has no false negatives. We are sure the event is NOT present: we
6987
// can keep it in the iterator.
7088
true
@@ -120,7 +138,39 @@ mod tests {
120138
}
121139

122140
#[test]
123-
fn test_filter_duplicates() {
141+
fn test_filter_duplicates_in_new_events() {
142+
let event_builder = EventBuilder::new();
143+
144+
let event_id_0 = owned_event_id!("$ev0");
145+
let event_id_1 = owned_event_id!("$ev1");
146+
147+
let event_0 = sync_timeline_event(&event_builder, &event_id_0);
148+
let event_1 = sync_timeline_event(&event_builder, &event_id_1);
149+
150+
let mut deduplicator = Deduplicator::new();
151+
let room_events = RoomEvents::new();
152+
153+
let mut events = deduplicator.filter_and_learn(
154+
[
155+
event_0.clone(), // OK
156+
event_0, // Not OK
157+
event_1, // OK
158+
]
159+
.into_iter(),
160+
&room_events,
161+
);
162+
163+
assert_let!(Some(event) = events.next());
164+
assert_eq!(event.event_id(), Some(event_id_0));
165+
166+
assert_let!(Some(event) = events.next());
167+
assert_eq!(event.event_id(), Some(event_id_1));
168+
169+
assert!(events.next().is_none());
170+
}
171+
172+
#[test]
173+
fn test_filter_duplicates_with_existing_events() {
124174
let event_builder = EventBuilder::new();
125175

126176
let event_id_0 = owned_event_id!("$ev0");

0 commit comments

Comments
 (0)