Skip to content

Use the one flatbuffer to store all lists #489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ harness = false
[features]
# If disabling default features, consider explicitly re-enabling the
# "embedded-domain-resolver" feature.
default = ["embedded-domain-resolver", "full-regex-handling", "unsync-regex-caching"]
default = ["embedded-domain-resolver", "full-regex-handling", "single-thread"]
full-regex-handling = []
unsync-regex-caching = [] # disables `Send` and `Sync` on `Engine`.
single-thread = [] # disables `Send` and `Sync` on `Engine`.
regex-debug-info = []
css-validation = ["cssparser", "selectors"]
content-blocking = []
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ By default, `adblock-rust` ships with a built-in domain resolution implementatio
`adblock-rust` uses uBlock Origin-compatible resources for scriptlet injection and redirect rules.
The `resource-assembler` feature allows `adblock-rust` to parse these resources directly from the file formats used by the uBlock Origin repository.

#### Thread safety (`unsync-regex-caching`)
#### Thread safety (`single-thread`)

The `unsync-regex-caching` feature enables optimizations for rule matching speed and the amount of memory used by the engine.
The `single-thread` feature enables optimizations for rule matching speed and the amount of memory used by the engine.
This feature can be disabled to make the engine `Send + Sync`, although it is recommended to only access the engine on a single thread to maintain optimal performance.
38 changes: 14 additions & 24 deletions benches/bench_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@ use criterion::*;

use serde::{Deserialize, Serialize};

use adblock::blocker::{Blocker, BlockerOptions};
use adblock::request::Request;
use adblock::resources::ResourceStorage;
use adblock::url_parser::parse_url;
use adblock::Engine;
use adblock::{Engine, FilterSet};

#[path = "../tests/test_utils.rs"]
mod test_utils;
Expand Down Expand Up @@ -36,14 +34,13 @@ fn load_requests() -> Vec<TestRequest> {
reqs
}

fn get_blocker(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Blocker {
fn get_engine(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Engine {
let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());

let blocker_options = BlockerOptions {
enable_optimizations: true,
};

Blocker::new(network_filters, &blocker_options)
Engine::from_filter_set(
FilterSet::new_with_rules(network_filters, vec![], false),
true,
)
}

fn bench_rule_matching(engine: &Engine, requests: &[TestRequest]) -> (u32, u32) {
Expand All @@ -61,15 +58,11 @@ fn bench_rule_matching(engine: &Engine, requests: &[TestRequest]) -> (u32, u32)
(matches, passes)
}

fn bench_matching_only(
blocker: &Blocker,
resources: &ResourceStorage,
requests: &[Request],
) -> (u32, u32) {
fn bench_matching_only(engine: &Engine, requests: &[Request]) -> (u32, u32) {
let mut matches = 0;
let mut passes = 0;
requests.iter().for_each(|parsed| {
let check = blocker.check(parsed, resources);
let check = engine.check_network_request(parsed);
if check.matched {
matches += 1;
} else {
Expand Down Expand Up @@ -150,14 +143,13 @@ fn rule_match_parsed_el(c: &mut Criterion) {
.filter_map(Result::ok)
.collect();
let requests_len = requests_parsed.len() as u64;
let blocker = get_blocker(rules);
let resources = ResourceStorage::default();
let engine = get_engine(rules);

group.throughput(Throughput::Elements(requests_len));
group.sample_size(10);

group.bench_function("easylist", move |b| {
b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed))
b.iter(|| bench_matching_only(&engine, &requests_parsed))
});

group.finish();
Expand All @@ -170,8 +162,7 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);
let blocker = get_blocker(full_rules);
let resources = ResourceStorage::default();
let engine = get_engine(full_rules);

let requests = load_requests();
let requests_parsed: Vec<_> = requests
Expand All @@ -182,7 +173,7 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
let requests_len = requests_parsed.len() as u64;

let slim_rules = rules_from_lists(&["data/slim-list.txt"]);
let slim_blocker = get_blocker(slim_rules);
let slim_engine = get_engine(slim_rules);

let requests_copy = load_requests();
let requests_parsed_copy: Vec<_> = requests_copy
Expand All @@ -195,11 +186,10 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) {
group.sample_size(10);

group.bench_function("el+ep", move |b| {
b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed))
b.iter(|| bench_matching_only(&engine, &requests_parsed))
});
let resources = ResourceStorage::default();
group.bench_function("slimlist", move |b| {
b.iter(|| bench_matching_only(&slim_blocker, &resources, &requests_parsed_copy))
b.iter(|| bench_matching_only(&slim_engine, &requests_parsed_copy))
});

group.finish();
Expand Down
42 changes: 16 additions & 26 deletions benches/bench_redirect_performance.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use adblock::{Engine, FilterSet};
use criterion::*;
use tokio::runtime::Runtime;

use adblock::blocker::{Blocker, BlockerOptions};
use adblock::filters::network::{NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper};
use adblock::request::Request;
use adblock::resources::ResourceStorage;
use adblock::resources::Resource;

const DEFAULT_LISTS_URL: &str =
"https://raw.githubusercontent.com/brave/adblock-resources/master/filter_lists/list_catalog.json";
Expand Down Expand Up @@ -84,18 +84,13 @@ fn get_redirect_rules() -> Vec<NetworkFilter> {
.collect()
}

/// Loads the supplied rules, and the test set of resources, into a Blocker
fn get_preloaded_blocker(rules: Vec<NetworkFilter>) -> Blocker {
let blocker_options = BlockerOptions {
enable_optimizations: true,
};

Blocker::new(rules, &blocker_options)
/// Loads the supplied rules, and the test set of resources, into a Engine
fn get_preloaded_engine(rules: Vec<NetworkFilter>) -> Engine {
let filter_set = FilterSet::new_with_rules(rules, vec![], false);
Engine::from_filter_set(filter_set, true /* optimize */)
}

fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> ResourceStorage {
let mut resources = ResourceStorage::default();

fn get_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Vec<Resource> {
#[cfg(feature = "resource-assembler")]
{
use adblock::resources::resource_assembler::assemble_web_accessible_resources;
Expand All @@ -111,10 +106,7 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re
"data/test/fake-uBO-files/scriptlets.js",
)),
);

resource_data.into_iter().for_each(|resource| {
let _res = resources.add_resource(resource);
});
resource_data
}

#[cfg(not(feature = "resource-assembler"))]
Expand All @@ -141,12 +133,8 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re
permission: Default::default(),
}
})
.for_each(|resource| {
let _res = resources.add_resource(resource);
});
.collect()
}

resources
}

/// Maps network filter rules into `Request`s that would trigger those rules
Expand Down Expand Up @@ -211,9 +199,9 @@ pub fn build_custom_requests(rules: Vec<NetworkFilter>) -> Vec<Request> {
.collect::<Vec<_>>()
}

fn bench_fn(blocker: &Blocker, resources: &ResourceStorage, requests: &[Request]) {
fn bench_fn(engine: &Engine, requests: &[Request]) {
requests.iter().for_each(|request| {
let block_result = blocker.check(request, resources);
let block_result = engine.check_network_request(request);
assert!(
block_result.redirect.is_some(),
"{:?}, {:?}",
Expand All @@ -228,16 +216,18 @@ fn redirect_performance(c: &mut Criterion) {

let rules = get_redirect_rules();

let blocker = get_preloaded_blocker(rules.clone());
let resources = build_resources_for_filters(&rules);
let mut engine = get_preloaded_engine(rules.clone());
let resources = get_resources_for_filters(&rules);
engine.use_resources(resources);

let requests = build_custom_requests(rules.clone());
let requests_len = requests.len() as u64;

group.throughput(Throughput::Elements(requests_len));
group.sample_size(10);

group.bench_function("without_alias_lookup", move |b| {
b.iter(|| bench_fn(&blocker, &resources, &requests))
b.iter(|| bench_fn(&engine, &requests))
});

group.finish();
Expand Down
18 changes: 8 additions & 10 deletions benches/bench_rules.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use criterion::*;
use once_cell::sync::Lazy;

use adblock::blocker::{Blocker, BlockerOptions};
use adblock::Engine;
use adblock::{Engine, FilterSet};

#[path = "../tests/test_utils.rs"]
mod test_utils;
Expand Down Expand Up @@ -79,14 +78,13 @@ fn list_parse(c: &mut Criterion) {
group.finish();
}

fn get_blocker(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Blocker {
fn get_engine(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Engine {
let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());

let blocker_options = BlockerOptions {
enable_optimizations: true,
};

Blocker::new(network_filters, &blocker_options)
Engine::from_filter_set(
FilterSet::new_with_rules(network_filters, vec![], false),
true,
)
}

fn blocker_new(c: &mut Criterion) {
Expand All @@ -104,9 +102,9 @@ fn blocker_new(c: &mut Criterion) {
let engine = Engine::from_rules(&brave_list_rules, Default::default());
let engine_serialized = engine.serialize().unwrap();

group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&easylist_rules)));
group.bench_function("el+ep", move |b| b.iter(|| get_engine(&easylist_rules)));
group.bench_function("brave-list", move |b| {
b.iter(|| get_blocker(&brave_list_rules))
b.iter(|| get_engine(&brave_list_rules))
});
group.bench_function("brave-list-deserialize", move |b| {
b.iter(|| {
Expand Down
Loading