Skip to content

Commit 27de614

Browse files
committed
Move memory to Engine
1 parent 6c18fe3 commit 27de614

File tree

6 files changed

+139
-101
lines changed

6 files changed

+139
-101
lines changed

src/blocker.rs

Lines changed: 20 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,15 @@
33
use memchr::{memchr as find_char, memrchr as find_char_reverse};
44
use once_cell::sync::Lazy;
55
use serde::Serialize;
6-
use std::collections::{HashMap, HashSet};
6+
use std::collections::HashSet;
77
use std::ops::DerefMut;
88

9-
use crate::filters::fb_network::NetworkFilterSharedState;
10-
use crate::filters::flat_builder::FlatBufferBuilder;
11-
use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper};
12-
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
9+
use crate::filters::fb_network::SharedStateRef;
10+
use crate::filters::network::NetworkFilterMaskHelper;
1311
use crate::network_filter_list::NetworkFilterList;
1412
use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy};
1513
use crate::request::Request;
1614
use crate::resources::ResourceStorage;
17-
use crate::utils::Hash;
1815

1916
/// Options used when constructing a [`Blocker`].
2017
pub struct BlockerOptions {
@@ -79,10 +76,9 @@ pub(crate) enum FilterId {
7976
TaggedFiltersAll = 7,
8077
Size = 8,
8178
}
79+
8280
/// Stores network filters for efficient querying.
8381
pub struct Blocker {
84-
pub(crate) memory: VerifiedFlatbufferMemory,
85-
8682
// Enabled tags are not serialized - when deserializing, tags of the existing
8783
// instance (the one we are recreating lists into) are maintained
8884
pub(crate) tags_enabled: HashSet<String>,
@@ -92,7 +88,7 @@ pub struct Blocker {
9288
#[cfg(not(feature = "unsync-regex-caching"))]
9389
pub(crate) regex_manager: std::sync::Mutex<RegexManager>,
9490

95-
pub(crate) shared_state: NetworkFilterSharedState,
91+
pub(crate) shared_state: SharedStateRef,
9692
}
9793

9894
impl Blocker {
@@ -105,7 +101,7 @@ impl Blocker {
105101
pub(crate) fn get_list(&self, id: FilterId) -> NetworkFilterList {
106102
// TODO: verify lists() size and id is in range
107103
NetworkFilterList {
108-
list: self.memory.root().lists().get(id as usize),
104+
list: self.shared_state.memory.root().lists().get(id as usize),
109105
shared_state: &self.shared_state,
110106
}
111107
}
@@ -439,82 +435,26 @@ impl Blocker {
439435
Some(merged)
440436
}
441437

442-
pub(crate) fn from_verified_memory(memory: VerifiedFlatbufferMemory) -> Self {
443-
// Reconstruct the unique_domains_hashes_map from the flatbuffer data
444-
let root = memory.root();
445-
let mut unique_domains_hashes_map: HashMap<crate::utils::Hash, u32> = HashMap::new();
446-
for (index, hash) in root.unique_domains_hashes().iter().enumerate() {
447-
unique_domains_hashes_map.insert(hash, index as u32);
448-
}
449-
450-
let shared_state = NetworkFilterSharedState {
451-
unique_domains_hashes_map,
452-
};
453-
438+
pub(crate) fn from_shared_state(shared_state: SharedStateRef) -> Self {
454439
Self {
440+
shared_state,
455441
tags_enabled: HashSet::new(),
456442
regex_manager: Default::default(),
457-
memory,
458-
shared_state,
459443
}
460444
}
461445

462-
pub fn new(mut network_filters: Vec<NetworkFilter>, options: &BlockerOptions) -> Self {
463-
// Injections
464-
// TODO: resource handling
465-
466-
let mut builder = FlatBufferBuilder::new(FilterId::Size as usize);
467-
468-
let mut badfilter_ids: HashSet<Hash> = HashSet::new();
469-
for filter in network_filters.iter() {
470-
if filter.is_badfilter() {
471-
badfilter_ids.insert(filter.get_id_without_badfilter());
472-
}
473-
}
474-
for filter in network_filters.drain(..) {
475-
// skip any bad filters
476-
let filter_id = filter.get_id();
477-
if badfilter_ids.contains(&filter_id) || filter.is_badfilter() {
478-
continue;
479-
}
480-
481-
// Redirects are independent of blocking behavior.
482-
if filter.is_redirect() {
483-
builder.add_filter(filter.clone(), FilterId::Redirects as u32);
484-
}
485-
486-
let list_id: FilterId = if filter.is_csp() {
487-
FilterId::Csp
488-
} else if filter.is_removeparam() {
489-
FilterId::RemoveParam
490-
} else if filter.is_generic_hide() {
491-
FilterId::GenericHide
492-
} else if filter.is_exception() {
493-
FilterId::Exceptions
494-
} else if filter.is_important() {
495-
FilterId::Importants
496-
} else if filter.tag.is_some() && !filter.is_redirect() {
497-
// `tag` + `redirect` is unsupported for now.
498-
FilterId::TaggedFiltersAll
499-
} else if (filter.is_redirect() && filter.also_block_redirect())
500-
|| !filter.is_redirect()
501-
{
502-
FilterId::Filters
503-
} else {
504-
continue;
505-
};
506-
507-
builder.add_filter(filter, list_id as u32);
508-
}
509-
510-
let memory = builder.finish(if options.enable_optimizations {
511-
// Don't optimize removeparam, since it can fuse filters without respecting distinct
512-
|id: u32| id != FilterId::RemoveParam as u32
513-
} else {
514-
|_| false
515-
});
516-
517-
Self::from_verified_memory(memory)
446+
#[cfg(test)]
447+
pub(crate) fn new(
448+
network_filters: Vec<crate::filters::network::NetworkFilter>,
449+
options: &BlockerOptions,
450+
) -> Self {
451+
use crate::filters::fb_network::SharedState;
452+
use crate::filters::flat_builder::FlatBufferBuilder;
453+
454+
let memory =
455+
FlatBufferBuilder::make_flatbuffer(network_filters, options.enable_optimizations);
456+
let shared_state = SharedState::new(memory);
457+
Self::from_shared_state(shared_state)
518458
}
519459

520460
pub fn use_tags(&mut self, tags: &[&str]) {

src/engine.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
//! The adblock [`Engine`] is the primary interface for adblocking.
22
3-
use crate::blocker::{Blocker, BlockerOptions, BlockerResult};
3+
use crate::blocker::{Blocker, BlockerResult};
44
use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources};
5+
use crate::filters::fb_network::{SharedState, SharedStateRef};
6+
use crate::filters::flat_builder::FlatBufferBuilder;
57
use crate::lists::{FilterSet, ParseOptions};
68
use crate::regex_manager::RegexManagerDiscardPolicy;
79
use crate::request::Request;
@@ -46,6 +48,7 @@ pub struct Engine {
4648
blocker: Blocker,
4749
cosmetic_cache: CosmeticFilterCache,
4850
resources: ResourceStorage,
51+
shared_state: SharedStateRef,
4952
}
5053

5154
impl Default for Engine {
@@ -60,15 +63,16 @@ impl Engine {
6063
/// used with deserialization.
6164
/// - `optimize` specifies whether or not to attempt to compress the internal representation by
6265
/// combining similar rules.
63-
pub fn new(optimize: bool) -> Self {
64-
let blocker_options = BlockerOptions {
65-
enable_optimizations: optimize,
66-
};
66+
pub fn new(_optimize: bool) -> Self {
67+
// TODO: remove _optimize?
68+
69+
let shared_state = SharedState::new(Default::default());
6770

6871
Self {
69-
blocker: Blocker::new(vec![], &blocker_options),
72+
blocker: Blocker::from_shared_state(shared_state.clone()),
7073
cosmetic_cache: CosmeticFilterCache::new(),
7174
resources: ResourceStorage::default(),
75+
shared_state,
7276
}
7377
}
7478

@@ -110,14 +114,15 @@ impl Engine {
110114
..
111115
} = set;
112116

113-
let blocker_options = BlockerOptions {
114-
enable_optimizations: optimize,
115-
};
117+
let memory = FlatBufferBuilder::make_flatbuffer(network_filters, optimize);
118+
119+
let shared_state = SharedState::new(memory);
116120

117121
Self {
118-
blocker: Blocker::new(network_filters, &blocker_options),
122+
blocker: Blocker::from_shared_state(shared_state.clone()),
119123
cosmetic_cache: CosmeticFilterCache::from_rules(cosmetic_filters),
120124
resources: ResourceStorage::default(),
125+
shared_state,
121126
}
122127
}
123128

@@ -247,7 +252,7 @@ impl Engine {
247252

248253
/// Serializes the `Engine` into a binary format so that it can be quickly reloaded later.
249254
pub fn serialize(&self) -> Result<Vec<u8>, crate::data_format::SerializationError> {
250-
crate::data_format::serialize_engine(&self.blocker.memory, &self.cosmetic_cache)
255+
crate::data_format::serialize_engine(&self.shared_state.memory, &self.cosmetic_cache)
251256
}
252257

253258
/// Deserialize the `Engine` from the binary format generated by `Engine::serialize`.
@@ -261,7 +266,8 @@ impl Engine {
261266
) -> Result<(), crate::data_format::DeserializationError> {
262267
let current_tags = self.blocker.tags_enabled();
263268
let (memory, cosmetic_cache) = crate::data_format::deserialize_engine(serialized)?;
264-
self.blocker = Blocker::from_verified_memory(memory);
269+
self.shared_state = SharedState::new(memory);
270+
self.blocker = Blocker::from_shared_state(self.shared_state.clone());
265271
self.blocker
266272
.use_tags(&current_tags.iter().map(|s| &**s).collect::<Vec<_>>());
267273
self.cosmetic_cache = cosmetic_cache;

src/filters/fb_network.rs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::collections::HashMap;
44

55
use crate::filters::network::{NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable};
6-
use crate::filters::unsafe_tools::fb_vector_to_slice;
6+
use crate::filters::unsafe_tools::{fb_vector_to_slice, VerifiedFlatbufferMemory};
77

88
use crate::regex_manager::RegexManager;
99
use crate::request::Request;
@@ -67,15 +67,37 @@ impl ExactSizeIterator for FlatPatternsIterator<'_> {
6767
}
6868
}
6969

70-
#[derive(Debug, Default)]
71-
pub(crate) struct NetworkFilterSharedState {
70+
// TODO: do we need another feature for this?
71+
#[cfg(feature = "unsync-regex-caching")]
72+
pub(crate) type SharedStateRef = std::rc::Rc<SharedState>;
73+
#[cfg(not(feature = "unsync-regex-caching"))]
74+
pub(crate) type SharedStateRef = std::rc::Arc<SharedState>;
75+
76+
#[derive(Default)]
77+
pub(crate) struct SharedState {
78+
pub(crate) memory: VerifiedFlatbufferMemory,
7279
pub(crate) unique_domains_hashes_map: HashMap<Hash, u32>,
7380
}
7481

82+
impl SharedState {
83+
pub(crate) fn new(memory: VerifiedFlatbufferMemory) -> SharedStateRef {
84+
// Reconstruct the unique_domains_hashes_map from the flatbuffer data
85+
let root = memory.root();
86+
let mut unique_domains_hashes_map: HashMap<crate::utils::Hash, u32> = HashMap::new();
87+
for (index, hash) in root.unique_domains_hashes().iter().enumerate() {
88+
unique_domains_hashes_map.insert(hash, index as u32);
89+
}
90+
SharedStateRef::new(Self {
91+
memory,
92+
unique_domains_hashes_map,
93+
})
94+
}
95+
}
96+
7597
/// Internal implementation of [NetworkFilter] that is compatible with flatbuffers.
7698
pub(crate) struct FlatNetworkFilter<'a> {
7799
key: u64,
78-
shared_state: &'a NetworkFilterSharedState,
100+
shared_state: &'a SharedState,
79101
fb_filter: &'a fb::NetworkFilter<'a>,
80102

81103
pub(crate) mask: NetworkFilterMask,
@@ -86,7 +108,7 @@ impl<'a> FlatNetworkFilter<'a> {
86108
pub fn new(
87109
filter: &'a fb::NetworkFilter<'a>,
88110
index: usize,
89-
shared_state: &'a NetworkFilterSharedState,
111+
shared_state: &'a SharedState,
90112
) -> Self {
91113
Self {
92114
fb_filter: filter,

src/filters/flat_builder.rs

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
//! Builder for creating flatbuffer-compatible Engine.
22
3-
use std::collections::HashMap;
3+
use std::collections::{HashMap, HashSet};
44
use std::vec;
55

66
use flatbuffers::WIPOffset;
77

8-
use crate::filters::network::NetworkFilter;
8+
use crate::blocker::FilterId;
9+
use crate::filters::network::{NetworkFilter, NetworkFilterMaskHelper};
910
use crate::filters::unsafe_tools::VerifiedFlatbufferMemory;
1011
use crate::network_filter_list::token_histogram;
1112
use crate::optimizer;
@@ -261,4 +262,63 @@ impl FlatBufferBuilder {
261262
},
262263
)
263264
}
265+
266+
pub fn make_flatbuffer(
267+
mut network_filters: Vec<NetworkFilter>,
268+
optimize: bool,
269+
) -> VerifiedFlatbufferMemory {
270+
// Injections
271+
// TODO: resource handling
272+
273+
let mut builder = FlatBufferBuilder::new(FilterId::Size as usize);
274+
275+
let mut badfilter_ids: HashSet<Hash> = HashSet::new();
276+
for filter in network_filters.iter() {
277+
if filter.is_badfilter() {
278+
badfilter_ids.insert(filter.get_id_without_badfilter());
279+
}
280+
}
281+
for filter in network_filters.drain(..) {
282+
// skip any bad filters
283+
let filter_id = filter.get_id();
284+
if badfilter_ids.contains(&filter_id) || filter.is_badfilter() {
285+
continue;
286+
}
287+
288+
// Redirects are independent of blocking behavior.
289+
if filter.is_redirect() {
290+
builder.add_filter(filter.clone(), FilterId::Redirects as u32);
291+
}
292+
293+
let list_id: FilterId = if filter.is_csp() {
294+
FilterId::Csp
295+
} else if filter.is_removeparam() {
296+
FilterId::RemoveParam
297+
} else if filter.is_generic_hide() {
298+
FilterId::GenericHide
299+
} else if filter.is_exception() {
300+
FilterId::Exceptions
301+
} else if filter.is_important() {
302+
FilterId::Importants
303+
} else if filter.tag.is_some() && !filter.is_redirect() {
304+
// `tag` + `redirect` is unsupported for now.
305+
FilterId::TaggedFiltersAll
306+
} else if (filter.is_redirect() && filter.also_block_redirect())
307+
|| !filter.is_redirect()
308+
{
309+
FilterId::Filters
310+
} else {
311+
continue;
312+
};
313+
314+
builder.add_filter(filter, list_id as u32);
315+
}
316+
317+
builder.finish(if optimize {
318+
// Don't optimize removeparam, since it can fuse filters without respecting distinct
319+
|id: u32| id != FilterId::RemoveParam as u32
320+
} else {
321+
|_| false
322+
})
323+
}
264324
}

src/filters/unsafe_tools.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ pub(crate) struct VerifiedFlatbufferMemory {
4848
start: usize,
4949
}
5050

51+
impl Default for VerifiedFlatbufferMemory {
52+
fn default() -> Self {
53+
// TODO: create an empty engine and get the memory from it
54+
Self {
55+
raw_data: vec![],
56+
start: 0,
57+
}
58+
}
59+
}
60+
5161
impl VerifiedFlatbufferMemory {
5262
pub(crate) fn from_raw(data: Vec<u8>) -> Result<Self, flatbuffers::InvalidFlatbuffer> {
5363
let memory = Self::from_vec(data);

0 commit comments

Comments
 (0)