Skip to content

Commit dd800d8

Browse files
committed
Optimization for FlatNetworkFilterList.
1 parent ceeefb4 commit dd800d8

File tree

3 files changed

+89
-28
lines changed

3 files changed

+89
-28
lines changed

src/filters/fb_network.rs

+16-18
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub struct FlatNetworkFiltersListBuilder<'a> {
2121
filters: Vec<WIPOffset<fb::NetworkFilter<'a>>>,
2222

2323
unique_domains: Vec<Hash>,
24+
unique_domains_map: HashMap<Hash, u16>,
2425
}
2526

2627
impl<'a> FlatNetworkFiltersListBuilder<'a> {
@@ -29,34 +30,31 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> {
2930
builder: flatbuffers::FlatBufferBuilder::new(),
3031
filters: vec![],
3132
unique_domains: vec![],
33+
unique_domains_map: HashMap::new(),
3234
}
3335
}
3436

35-
fn get_or_insert(arr: &mut Vec<Hash>, h: Hash) -> u16 {
36-
if let Some(index) = arr.iter().position(|&x| x == h) {
37-
u16::try_from(index).expect("< u16 max")
38-
} else {
39-
arr.push(h);
40-
u16::try_from(arr.len() - 1).expect("< u16 max")
37+
fn get_or_insert(&mut self, h: &Hash) -> u16 {
38+
if let Some(&index) = self.unique_domains_map.get(h) {
39+
return index;
4140
}
41+
let index = self.unique_domains.len() as u16;
42+
self.unique_domains.push(*h);
43+
self.unique_domains_map.insert(*h, index);
44+
return index;
4245
}
4346

4447
pub fn add(&mut self, network_filter: &NetworkFilter) -> u32 {
4548
let opt_domains = network_filter.opt_domains.as_ref().map(|v| {
46-
let mut o: Vec<u16> = v
47-
.into_iter()
48-
.map(|x| Self::get_or_insert(&mut self.unique_domains, *x))
49-
.collect();
49+
let mut o: Vec<u16> = v.iter().map(|x| self.get_or_insert(x)).collect();
50+
5051
o.sort_unstable();
5152
o.dedup();
5253
self.builder.create_vector(&o)
5354
});
5455

5556
let opt_not_domains = network_filter.opt_not_domains.as_ref().map(|v| {
56-
let mut o: Vec<u16> = v
57-
.into_iter()
58-
.map(|x| Self::get_or_insert(&mut self.unique_domains, *x))
59-
.collect();
57+
let mut o: Vec<u16> = v.iter().map(|x| self.get_or_insert(x)).collect();
6058
o.sort_unstable();
6159
o.dedup();
6260
self.builder.create_vector(&o)
@@ -65,23 +63,23 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> {
6563
let modifier_option = network_filter
6664
.modifier_option
6765
.as_ref()
68-
.map(|s| self.builder.create_shared_string(&s));
66+
.map(|s| self.builder.create_string(&s));
6967

7068
let hostname = network_filter
7169
.hostname
7270
.as_ref()
73-
.map(|s| self.builder.create_shared_string(&s));
71+
.map(|s| self.builder.create_string(&s));
7472

7573
let tag = network_filter
7674
.tag
7775
.as_ref()
78-
.map(|s| self.builder.create_shared_string(&s));
76+
.map(|s| self.builder.create_string(&s));
7977

8078
let patterns = if network_filter.filter.iter().len() > 0 {
8179
let offsets: Vec<WIPOffset<&str>> = network_filter
8280
.filter
8381
.iter()
84-
.map(|s| self.builder.create_shared_string(s))
82+
.map(|s| self.builder.create_string(s))
8583
.collect();
8684
Some(self.builder.create_vector(&offsets))
8785
} else {

src/network_filter_list.rs

+49-5
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,59 @@ pub struct FlatNetworkFilterList {
267267

268268
impl NetworkFilterListTrait for FlatNetworkFilterList {
269269
fn new(filters: Vec<NetworkFilter>, optimize: bool) -> Self {
270-
let mut temp_list = NetworkFilterList::new(filters, optimize);
270+
// Compute tokens for all filters
271+
let filter_tokens: Vec<_> = filters
272+
.into_iter()
273+
.map(|filter| {
274+
let tokens = filter.get_tokens();
275+
(filter, tokens)
276+
})
277+
.collect();
278+
// compute the tokens' frequency histogram
279+
let (total_number_of_tokens, tokens_histogram) = token_histogram(&filter_tokens);
271280

272281
let mut flat_builder = FlatNetworkFiltersListBuilder::new();
273282
let mut filter_map = HashMap::<Hash, Vec<u32>>::new();
283+
let mut optimizable = HashMap::<Hash, Vec<NetworkFilter>>::new();
284+
{
285+
for (network_filter, multi_tokens) in filter_tokens {
286+
let index = if !optimizer::is_filter_optimizable_by_patterns(&network_filter) {
287+
Some(flat_builder.add(&network_filter))
288+
} else {
289+
None
290+
};
291+
292+
for tokens in multi_tokens {
293+
let mut best_token: Hash = 0;
294+
let mut min_count = total_number_of_tokens + 1;
295+
for token in tokens {
296+
match tokens_histogram.get(&token) {
297+
None => {
298+
min_count = 0;
299+
best_token = token
300+
}
301+
Some(&count) if count < min_count => {
302+
min_count = count;
303+
best_token = token
304+
}
305+
_ => {}
306+
}
307+
}
308+
if let Some(index) = index {
309+
insert_dup(&mut filter_map, best_token, index);
310+
} else {
311+
insert_dup(&mut optimizable, best_token, network_filter.clone());
312+
}
313+
} // tokens
314+
}
315+
}
316+
317+
for (token, v) in optimizable {
318+
let optimized = optimizer::optimize_by_groupping_patterns(v);
274319

275-
for (key, vec) in temp_list.filter_map.drain() {
276-
for filter in vec.into_iter() {
277-
let index = flat_builder.add(&(*filter));
278-
insert_dup(&mut filter_map, key, index);
320+
for filter in optimized {
321+
let index = flat_builder.add(&filter);
322+
insert_dup(&mut filter_map, token, index);
279323
}
280324
}
281325

src/optimizer.rs

+24-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,29 @@ trait Optimization {
1010
fn select(&self, filter: &NetworkFilter) -> bool;
1111
}
1212

13+
pub fn is_filter_optimizable_by_patterns(filter: &NetworkFilter) -> bool {
14+
filter.opt_domains.is_none()
15+
&& filter.opt_not_domains.is_none()
16+
&& !filter.is_hostname_anchor()
17+
&& !filter.is_redirect()
18+
&& !filter.is_csp()
19+
}
20+
21+
pub fn optimize_by_groupping_patterns(filters: Vec<NetworkFilter>) -> Vec<NetworkFilter> {
22+
let mut optimized: Vec<NetworkFilter> = Vec::new();
23+
24+
let simple_pattern_group = SimplePatternGroup {};
25+
let (mut fused, mut unfused) = apply_optimisation(&simple_pattern_group, filters);
26+
optimized.append(&mut fused);
27+
28+
// Append whatever is still left unfused
29+
optimized.append(&mut unfused);
30+
31+
// Re-sort the list, now that the order has been perturbed
32+
optimized.sort_by_key(|f| f.id);
33+
optimized
34+
}
35+
1336
/// Fuse `NetworkFilter`s together by applying optimizations sequentially.
1437
pub fn optimize(filters: Vec<NetworkFilter>) -> Vec<NetworkFilter> {
1538
let mut optimized: Vec<NetworkFilter> = Vec::new();
@@ -129,11 +152,7 @@ impl Optimization for SimplePatternGroup {
129152
format!("{:b}:{:?}", filter.mask, filter.is_complete_regex())
130153
}
131154
fn select(&self, filter: &NetworkFilter) -> bool {
132-
filter.opt_domains.is_none()
133-
&& filter.opt_not_domains.is_none()
134-
&& !filter.is_hostname_anchor()
135-
&& !filter.is_redirect()
136-
&& !filter.is_csp()
155+
is_filter_optimizable_by_patterns(filter)
137156
}
138157
}
139158

0 commit comments

Comments
 (0)