Skip to content

Setup basic perf CI #417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jan 28, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions .github/workflows/perf-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# CI for performance benchmarking
# Domains of interest:
# * startup speed (== to parse and load all rules)
# * network filter matching (== the avg time to check a request)
# * first request matching delay (== time to check the first request)
# * memory usage after loading rules and after a few requests
name: Performance CI

on:
push:
branches: [ master ]
pull_request:

permissions:
contents: write
pages: write
pull-requests: write

jobs:
benchmark:
name: Performance benchmarking
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4

- name: Bench network filter matching
run: cargo bench --bench bench_matching rule-match-browserlike/brave-list -- --output-format bencher | tee -a output.txt

- name: Bench first request matching delay
run: cargo bench --bench bench_matching rule-match-first-request -- --output-format bencher | tee -a output.txt

- name: Bench startup speed
run: cargo bench --bench bench_rules blocker_new/brave-list -- --output-format bencher | tee -a output.txt

- name: Bench memory usage
run: cargo bench --bench bench_memory -- --output-format bencher | tee -a output.txt

- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@d48d326b4ca9ba73ca0cd0d59f108f9e02a381c7 # v1.20.4
with:
name: Rust Benchmark
tool: 'cargo'
output-file-path: output.txt
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '130%' # fails on +30% regression
comment-on-alert: true
fail-on-alert: true
comment-always: true
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -75,6 +75,11 @@ harness = false
name = "bench_redirect_performance"
harness = false


[[bench]]
name = "bench_memory"
harness = false

# Currently disabled, as cosmetic filter internals
# are no longer part of the crate's public API
#[[bench]]
60 changes: 50 additions & 10 deletions benches/bench_matching.rs
Original file line number Diff line number Diff line change
@@ -76,9 +76,11 @@ fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests:
(matches, passes)
}

type ParsedRequest = (String, String, String, String, bool);

fn bench_rule_matching_browserlike(
blocker: &Engine,
requests: &Vec<(String, String, String, String, bool)>,
requests: &Vec<ParsedRequest>,
) -> (u32, u32) {
let mut matches = 0;
let mut passes = 0;
@@ -331,27 +333,64 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) {
.collect::<Vec<_>>()
}

let elep_req = requests_parsed(&requests);
let el_req = elep_req.clone();
let slim = elep_req.clone();
let requests = requests_parsed(&requests);

group.bench_function("el+ep", move |b| {
group.bench_function("el+ep", |b| {
let rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);
let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &elep_req))
b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
});
group.bench_function("el", move |b| {
group.bench_function("el", |b| {
let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &el_req))
b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
});
group.bench_function("slimlist", move |b| {
group.bench_function("slimlist", |b| {
let rules = rules_from_lists(&["data/slim-list.txt"]);
let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &slim))
b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
});
group.bench_function("brave-list", |b| {
let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
});

group.finish();
}

fn rule_match_first_request(c: &mut Criterion) {
let mut group = c.benchmark_group("rule-match-first-request");

group.sample_size(10);

let requests: Vec<ParsedRequest> = vec![(
"https://example.com".to_string(),
"example.com".to_string(),
"example.com".to_string(),
"document".to_string(),
false,
)];

group.bench_function("brave-list", |b| {
b.iter_custom(
|iters| {
let mut total_time = std::time::Duration::ZERO;
for _ in 0..iters {
let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);

// Measure only the matching time, skip setup and destruction
let start_time = std::time::Instant::now();
bench_rule_matching_browserlike(&engine, &requests);
total_time += start_time.elapsed();
}
total_time
}
)
});

group.finish();
@@ -363,6 +402,7 @@ criterion_group!(
rule_match_parsed_el,
rule_match_parsed_elep_slimlist,
rule_match_browserlike_comparable,
rule_match_first_request,
serialization,
deserialization
);
157 changes: 157 additions & 0 deletions benches/bench_memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* Copyright (c) 2025 The Brave Authors. All rights reserved.
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at https://mozilla.org/MPL/2.0/. */

use criterion::*;
use std::alloc::{GlobalAlloc, Layout, System};
use std::sync::atomic::{AtomicUsize, Ordering};
use serde::{Deserialize, Serialize};

use adblock::Engine;
use adblock::request::Request;

#[path = "../tests/test_utils.rs"]
mod test_utils;
use test_utils::rules_from_lists;

// Custom allocator to track memory usage
#[global_allocator]
static ALLOCATOR: MemoryTracker = MemoryTracker::new();

struct MemoryTracker {
allocated: AtomicUsize,
internal: System,
}

impl MemoryTracker {
const fn new() -> Self {
Self {
allocated: AtomicUsize::new(0),
internal: System,
}
}

fn current_usage(&self) -> usize {
self.allocated.load(Ordering::SeqCst)
}

fn reset(&self) {
self.allocated.store(0, Ordering::SeqCst);
}
}

unsafe impl GlobalAlloc for MemoryTracker {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let ret = self.internal.alloc(layout);
if !ret.is_null() {
self.allocated.fetch_add(layout.size(), Ordering::SeqCst);
}
ret
}

unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
self.internal.dealloc(ptr, layout);
self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
}

unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
let ret = self.internal.realloc(ptr, layout, new_size);
if !ret.is_null() {
self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
self.allocated.fetch_add(new_size, Ordering::SeqCst);
}
ret
}

unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
let ret = self.internal.alloc_zeroed(layout);
if !ret.is_null() {
self.allocated.fetch_add(layout.size(), Ordering::SeqCst);
}
ret
}
}

#[allow(non_snake_case)]
#[derive(Serialize, Deserialize, Clone)]
struct TestRequest {
frameUrl: String,
url: String,
cpt: String,
}

impl From<&TestRequest> for Request {
fn from(v: &TestRequest) -> Self {
Request::new(&v.url, &v.frameUrl, &v.cpt).unwrap()
}
}

fn load_requests() -> Vec<TestRequest> {
let requests_str = rules_from_lists(&["data/requests.json"]);
let reqs: Vec<TestRequest> = requests_str
.into_iter()
.map(|r| serde_json::from_str(&r))
.filter_map(Result::ok)
.collect();
reqs
}

fn bench_memory_usage(c: &mut Criterion) {
let mut group = c.benchmark_group("memory-usage");
group.sample_size(10);
group.measurement_time(std::time::Duration::from_secs(1));

let mut noise = 0;
let all_requests = load_requests();
let first_1000_requests: Vec<_> = all_requests.iter().take(1000).collect();

group.bench_function("brave-list-initial", |b| {
let mut result = 0;
b.iter_custom(|iters| {
for _ in 0..iters {
ALLOCATOR.reset();
let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
let engine = Engine::from_rules(rules, Default::default());

noise += 1; // add some noise to make criterion happy
result += ALLOCATOR.current_usage() + noise;

// Prevent engine from being optimized
criterion::black_box(&engine);
}

// Return the memory usage as a Duration
std::time::Duration::from_nanos(result as u64)
});
});

group.bench_function("brave-list-after-1000-requests", |b| {
b.iter_custom(|iters| {
let mut result = 0;
for _ in 0..iters {
ALLOCATOR.reset();
let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
let engine = Engine::from_rules(rules, Default::default());

for request in first_1000_requests.clone() {
criterion::black_box(engine.check_network_request(&request.into()));
}

noise += 1; // add some noise to make criterion happy
result += ALLOCATOR.current_usage() + noise;

// Prevent engine from being optimized
criterion::black_box(&engine);
}

// Return the memory usage as a Duration
std::time::Duration::from_nanos(result as u64)
})
});

group.finish();
}

criterion_group!(benches, bench_memory_usage);
criterion_main!(benches);
10 changes: 6 additions & 4 deletions benches/bench_rules.rs
Original file line number Diff line number Diff line change
@@ -84,8 +84,6 @@ fn list_parse(c: &mut Criterion) {
fn get_blocker(rules: impl IntoIterator<Item=impl AsRef<str>>) -> Blocker {
let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());

println!("Got {} network filters", network_filters.len());

let blocker_options = BlockerOptions {
enable_optimizations: true,
};
@@ -99,12 +97,16 @@ fn blocker_new(c: &mut Criterion) {
group.throughput(Throughput::Elements(1));
group.sample_size(10);

let rules: Vec<_> = rules_from_lists(&[
let easylist_rules: Vec<_> = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]).collect();
let brave_list_rules: Vec<_> = rules_from_lists(&[
"data/brave/brave-main-list.txt",
]).collect();

group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&rules)));
group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&easylist_rules)));
group.bench_function("brave-list", move |b| b.iter(|| get_blocker(&brave_list_rules)));

group.finish();
}
179,208 changes: 179,208 additions & 0 deletions data/brave/brave-main-list.txt

Large diffs are not rendered by default.

114,057 changes: 51,789 additions & 62,268 deletions data/easylist.to/easylist/easylist.txt

Large diffs are not rendered by default.

58,870 changes: 47,863 additions & 11,007 deletions data/easylist.to/easylist/easyprivacy.txt

Large diffs are not rendered by default.

11,686 changes: 4,521 additions & 7,165 deletions data/easylist.to/easylistgermany/easylistgermany.txt

Large diffs are not rendered by default.

62,694 changes: 0 additions & 62,694 deletions data/test/easylist.txt

This file was deleted.

13,088 changes: 0 additions & 13,088 deletions data/test/easyprivacy.txt

This file was deleted.

47 changes: 47 additions & 0 deletions data/update-lists.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
const { execSync } = require("child_process");
const fs = require("fs");
const path = require("path");

// Remove readline and use command line arguments
const args = process.argv.slice(2);

if (args.length < 2) {
console.error(
"Usage: node update-lists.js <Brave Services Key> <target version for brave list (i.e. 1.0.10268)>"
);
process.exit(1);
}

const apiKey = args[0];
const version = args[1];

const versionNumber = version.replace(/\./g, "_");
const extensionId = "iodkpdagapdfkphljnddpjlldadblomo";

execSync(
"curl -o data/easylist.to/easylist/easylist.txt https://easylist.to/easylist/easylist.txt"
);
execSync(
"curl -o data/easylist.to/easylist/easyprivacy.txt https://easylist.to/easylist/easyprivacy.txt"
);
execSync(
"curl -o data/easylist.to/easylistgermany/easylistgermany.txt https://easylist.to/easylistgermany/easylistgermany.txt"
);

execSync(
`curl -o extension.zip -H "BraveServiceKey: ${apiKey}" ` +
`https://brave-core-ext.s3.brave.com/release/${extensionId}/extension_${versionNumber}.crx`
);

const tempDir = fs.mkdtempSync("temp-brave-list");
const listPath = path.join(tempDir, "list.txt");
try {
execSync("unzip extension.zip -d " + tempDir);
} catch (e) {
if (!fs.existsSync(listPath)) {
console.error("Failed to find list.txt in extension.zip");
process.exit(1);
}
}

execSync(`mv -f ${listPath} data/brave/brave-main-list.txt`);
3 changes: 3 additions & 0 deletions js/Cargo.toml
Original file line number Diff line number Diff line change
@@ -14,3 +14,6 @@ serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
adblock = { path = "../", features = ["css-validation", "content-blocking", "resource-assembler"] }
neon = { version = "^0.10.1", default-features = false, features = ["napi-1"] }

[features]
default-panic-hook = []
Comment on lines +17 to +19
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this does anything?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of the changes in Rust 1.84 is rust-lang/rust#132577, so this is now a lint warning (and thus an error since we've configured lint warnings as errors). I think it's resolved in newer versions of Neon, but we can leave it here as-is for now.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@
"build": "cd js && cargo-cp-artifact -nc index.node -- cargo build --message-format=json-render-diagnostics",
"build-debug": "npm run build --",
"build-release": "npm run build -- --release",
"update-lists": "node data/update-lists.js",
"install": "npm run build-release",
"test": "cargo test"
}
22 changes: 16 additions & 6 deletions src/blocker.rs
Original file line number Diff line number Diff line change
@@ -2187,10 +2187,20 @@ mod legacy_rule_parsing_tests {
// difference from original counts caused by not handling document/subdocument options and possibly miscounting on the blocker side.
// Printing all non-cosmetic, non-html, non-comment/-empty rules and ones with no unsupported options yields 29142 items
// This engine also handles 3 rules that old one does not
const EASY_LIST: ListCounts = ListCounts { filters: 24064, cosmetic_filters: 31163, exceptions: 5796, duplicates: 0 };
const EASY_LIST: ListCounts = ListCounts {
filters: 35597, // 36259 - 662 exceptions
cosmetic_filters: if cfg!(feature = "css-validation") { 23072 } else { 23080 },
exceptions: 662,
duplicates: 0
};
// easyPrivacy = { 11817, 0, 0, 1020 };
// differences in counts explained by hashset size underreporting as detailed in the next two cases
const EASY_PRIVACY: ListCounts = ListCounts { filters: 11889, cosmetic_filters: 0, exceptions: 1021, duplicates: 2 };
const EASY_PRIVACY: ListCounts = ListCounts {
filters: 52278, // 52998 - 720 exceptions
cosmetic_filters: 21,
exceptions: 720,
duplicates: 2
};
// ublockUnbreak = { 4, 8, 0, 94 };
// differences in counts explained by client.hostAnchoredExceptionHashSet->GetSize() underreporting when compared to client.numHostAnchoredExceptionFilters
const UBLOCK_UNBREAK: ListCounts = ListCounts { filters: 4, cosmetic_filters: 8, exceptions: 98, duplicates: 0 };
@@ -2238,12 +2248,12 @@ mod legacy_rule_parsing_tests {

#[test]
fn parse_easylist() {
check_list_counts(["./data/test/easylist.txt"], FilterFormat::Standard, EASY_LIST);
check_list_counts(["./data/easylist.to/easylist/easylist.txt"], FilterFormat::Standard, EASY_LIST);
}

#[test]
fn parse_easyprivacy() {
check_list_counts(["./data/test/easyprivacy.txt"], FilterFormat::Standard, EASY_PRIVACY);
check_list_counts(["./data/easylist.to/easylist/easyprivacy.txt"], FilterFormat::Standard, EASY_PRIVACY);
}

#[test]
@@ -2286,8 +2296,8 @@ mod legacy_rule_parsing_tests {
let expectation = EASY_LIST + EASY_PRIVACY + UBLOCK_UNBREAK + BRAVE_UNBREAK;
check_list_counts(
[
"./data/test/easylist.txt",
"./data/test/easyprivacy.txt",
"./data/easylist.to/easylist/easylist.txt",
"./data/easylist.to/easylist/easyprivacy.txt",
"./data/test/ublock-unbreak.txt",
"./data/test/brave-unbreak.txt",
],
2 changes: 1 addition & 1 deletion src/filters/network.rs
Original file line number Diff line number Diff line change
@@ -3418,7 +3418,7 @@ mod hash_collision_tests {
let rules = test_utils::rules_from_lists([
"data/easylist.to/easylist/easylist.txt",
"data/easylist.to/easylist/easyprivacy.txt",
]);
]).filter(|f| f != "||www.bred4tula.com^"); // remove known collision
let (network_filters, _) = parse_filters(rules, true, Default::default());

let mut filter_ids: HashMap<Hash, String> = HashMap::new();