brave · atuchin-m · Jan 28, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
@@ -0,0 +1,50 @@
+# CI for performance benchmarking
+# Domains of interest:
+# * startup speed (== to parse and load all rules)
+# * network filter matching (== the avg time to check a request)
+# * first request matching delay (== time to check the first request)
+# * memory usage after loading rules and after a few requests
+name: Performance CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+
+permissions:
+  contents: write
+  pages: write
+  pull-requests: write
+
+jobs:
+  benchmark:
+    name: Performance benchmarking
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
+
+      - name: Bench network filter matching
+        run: cargo bench --bench bench_matching rule-match-browserlike/brave-list -- --output-format bencher | tee -a output.txt
+
+      - name: Bench first request matching delay
+        run: cargo bench --bench bench_matching rule-match-first-request -- --output-format bencher | tee -a output.txt
+
+      - name: Bench startup speed
+        run: cargo bench --bench bench_rules blocker_new/brave-list -- --output-format bencher | tee -a output.txt
+
+      - name: Bench memory usage
+        run: cargo bench --bench bench_memory -- --output-format bencher | tee -a output.txt
+
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@d48d326b4ca9ba73ca0cd0d59f108f9e02a381c7 # v1.20.4
+        with:
+          name: Rust Benchmark
+          tool: 'cargo'
+          output-file-path: output.txt
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          alert-threshold: '130%' # fails on +30% regression
+          comment-on-alert: true
+          fail-on-alert: true
+          comment-always: true
@@ -75,6 +75,11 @@ harness = false
 name = "bench_redirect_performance"
 harness = false
 
+
+[[bench]]
+name = "bench_memory"
+harness = false
+
 # Currently disabled, as cosmetic filter internals
 # are no longer part of the crate's public API
 #[[bench]]

@@ -76,9 +76,11 @@ fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests:
     (matches, passes)
 }
 
+type ParsedRequest = (String, String, String, String, bool);
+
 fn bench_rule_matching_browserlike(
     blocker: &Engine,
-    requests: &Vec<(String, String, String, String, bool)>,
+    requests: &Vec<ParsedRequest>,
 ) -> (u32, u32) {
     let mut matches = 0;
     let mut passes = 0;
@@ -331,27 +333,64 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) {
             .collect::<Vec<_>>()
     }
 
-    let elep_req = requests_parsed(&requests);
-    let el_req = elep_req.clone();
-    let slim = elep_req.clone();
+    let requests = requests_parsed(&requests);
 
-    group.bench_function("el+ep", move |b| {
+    group.bench_function("el+ep", |b| {
         let rules = rules_from_lists(&[
             "data/easylist.to/easylist/easylist.txt",
             "data/easylist.to/easylist/easyprivacy.txt",
         ]);
         let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
-        b.iter(|| bench_rule_matching_browserlike(&engine, &elep_req))
+        b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
     });
-    group.bench_function("el", move |b| {
+    group.bench_function("el", |b| {
         let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
         let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
-        b.iter(|| bench_rule_matching_browserlike(&engine, &el_req))
+        b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
     });
-    group.bench_function("slimlist", move |b| {
+    group.bench_function("slimlist", |b| {
         let rules = rules_from_lists(&["data/slim-list.txt"]);
         let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
-        b.iter(|| bench_rule_matching_browserlike(&engine, &slim))
+        b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
+    });
+    group.bench_function("brave-list", |b| {
+      let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
+      let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
+      b.iter(|| bench_rule_matching_browserlike(&engine, &requests))
+  });
+
+    group.finish();
+}
+
+fn rule_match_first_request(c: &mut Criterion) {
+    let mut group = c.benchmark_group("rule-match-first-request");
+
+    group.sample_size(10);
+
+    let requests: Vec<ParsedRequest> = vec![(
+        "https://example.com".to_string(),
+        "example.com".to_string(),
+        "example.com".to_string(),
+        "document".to_string(),
+        false,
+    )];
+
+    group.bench_function("brave-list", |b| {
+        b.iter_custom(
+            |iters| {
+                let mut total_time = std::time::Duration::ZERO;
+                for _ in 0..iters {
+                  let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
+                  let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true);
+
+                  // Measure only the matching time, skip setup and destruction
+                  let start_time = std::time::Instant::now();
+                  bench_rule_matching_browserlike(&engine, &requests);
+                  total_time += start_time.elapsed();
+                }
+                total_time
+            }
+        )
     });
 
     group.finish();
@@ -363,6 +402,7 @@ criterion_group!(
     rule_match_parsed_el,
     rule_match_parsed_elep_slimlist,
     rule_match_browserlike_comparable,
+    rule_match_first_request,
     serialization,
     deserialization
 );

@@ -0,0 +1,157 @@
+/* Copyright (c) 2025 The Brave Authors. All rights reserved.
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use criterion::*;
+use std::alloc::{GlobalAlloc, Layout, System};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use serde::{Deserialize, Serialize};
+
+use adblock::Engine;
+use adblock::request::Request;
+
+#[path = "../tests/test_utils.rs"]
+mod test_utils;
+use test_utils::rules_from_lists;
+
+// Custom allocator to track memory usage
+#[global_allocator]
+static ALLOCATOR: MemoryTracker = MemoryTracker::new();
+
+struct MemoryTracker {
+    allocated: AtomicUsize,
+    internal: System,
+}
+
+impl MemoryTracker {
+    const fn new() -> Self {
+        Self {
+            allocated: AtomicUsize::new(0),
+            internal: System,
+        }
+    }
+
+    fn current_usage(&self) -> usize {
+        self.allocated.load(Ordering::SeqCst)
+    }
+
+    fn reset(&self) {
+        self.allocated.store(0, Ordering::SeqCst);
+    }
+}
+
+unsafe impl GlobalAlloc for MemoryTracker {
+    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+        let ret = self.internal.alloc(layout);
+        if !ret.is_null() {
+            self.allocated.fetch_add(layout.size(), Ordering::SeqCst);
+        }
+        ret
+    }
+
+    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+        self.internal.dealloc(ptr, layout);
+        self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
+    }
+
+    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
+        let ret = self.internal.realloc(ptr, layout, new_size);
+        if !ret.is_null() {
+            self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
+            self.allocated.fetch_add(new_size, Ordering::SeqCst);
+        }
+        ret
+    }
+
+    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
+        let ret = self.internal.alloc_zeroed(layout);
+        if !ret.is_null() {
+            self.allocated.fetch_add(layout.size(), Ordering::SeqCst);
+        }
+        ret
+    }
+}
+
+#[allow(non_snake_case)]
+#[derive(Serialize, Deserialize, Clone)]
+struct TestRequest {
+    frameUrl: String,
+    url: String,
+    cpt: String,
+}
+
+impl From<&TestRequest> for Request {
+    fn from(v: &TestRequest) -> Self {
+        Request::new(&v.url, &v.frameUrl, &v.cpt).unwrap()
+    }
+}
+
+fn load_requests() -> Vec<TestRequest> {
+    let requests_str = rules_from_lists(&["data/requests.json"]);
+    let reqs: Vec<TestRequest> = requests_str
+        .into_iter()
+        .map(|r| serde_json::from_str(&r))
+        .filter_map(Result::ok)
+        .collect();
+    reqs
+}
+
+fn bench_memory_usage(c: &mut Criterion) {
+    let mut group = c.benchmark_group("memory-usage");
+    group.sample_size(10);
+    group.measurement_time(std::time::Duration::from_secs(1));
+
+    let mut noise = 0;
+    let all_requests = load_requests();
+    let first_1000_requests: Vec<_> = all_requests.iter().take(1000).collect();
+
+    group.bench_function("brave-list-initial", |b| {
+        let mut result = 0;
+        b.iter_custom(|iters| {
+            for _ in 0..iters {
+              ALLOCATOR.reset();
+              let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
+              let engine = Engine::from_rules(rules, Default::default());
+
+              noise += 1; // add some noise to make criterion happy
+              result += ALLOCATOR.current_usage() + noise;
+
+              // Prevent engine from being optimized
+              criterion::black_box(&engine);
+            }
+
+            // Return the memory usage as a Duration
+            std::time::Duration::from_nanos(result as u64)
+        });
+    });
+
+    group.bench_function("brave-list-after-1000-requests", |b| {
+        b.iter_custom(|iters| {
+            let mut result = 0;
+            for _ in 0..iters {
+                ALLOCATOR.reset();
+                let rules = rules_from_lists(&["data/brave/brave-main-list.txt"]);
+                let engine = Engine::from_rules(rules, Default::default());
+
+              for request in first_1000_requests.clone() {
+                  criterion::black_box(engine.check_network_request(&request.into()));
+              }
+
+              noise += 1; // add some noise to make criterion happy
+              result += ALLOCATOR.current_usage() + noise;
+
+              // Prevent engine from being optimized
+              criterion::black_box(&engine);
+            }
+
+            // Return the memory usage as a Duration
+            std::time::Duration::from_nanos(result as u64)
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_memory_usage);
+criterion_main!(benches);
@@ -84,8 +84,6 @@ fn list_parse(c: &mut Criterion) {
 fn get_blocker(rules: impl IntoIterator<Item=impl AsRef<str>>) -> Blocker {
     let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());
 
-    println!("Got {} network filters", network_filters.len());
-
     let blocker_options = BlockerOptions {
         enable_optimizations: true,
     };
@@ -99,12 +97,16 @@ fn blocker_new(c: &mut Criterion) {
     group.throughput(Throughput::Elements(1));
     group.sample_size(10);
 
-    let rules: Vec<_> = rules_from_lists(&[
+    let easylist_rules: Vec<_> = rules_from_lists(&[
         "data/easylist.to/easylist/easylist.txt",
         "data/easylist.to/easylist/easyprivacy.txt",
     ]).collect();
+    let brave_list_rules: Vec<_> = rules_from_lists(&[
+        "data/brave/brave-main-list.txt",
+    ]).collect();
 
-    group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&rules)));
+    group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&easylist_rules)));
+    group.bench_function("brave-list", move |b| b.iter(|| get_blocker(&brave_list_rules)));
 
     group.finish();
 }

@@ -0,0 +1,47 @@
+const { execSync } = require("child_process");
+const fs = require("fs");
+const path = require("path");
+
+// Remove readline and use command line arguments
+const args = process.argv.slice(2);
+
+if (args.length < 2) {
+  console.error(
+    "Usage: node update-lists.js <Brave Services Key> <target version for brave list (i.e. 1.0.10268)>"
+  );
+  process.exit(1);
+}
+
+const apiKey = args[0];
+const version = args[1];
+
+const versionNumber = version.replace(/\./g, "_");
+const extensionId = "iodkpdagapdfkphljnddpjlldadblomo";
+
+execSync(
+  "curl -o data/easylist.to/easylist/easylist.txt https://easylist.to/easylist/easylist.txt"
+);
+execSync(
+  "curl -o data/easylist.to/easylist/easyprivacy.txt https://easylist.to/easylist/easyprivacy.txt"
+);
+execSync(
+  "curl -o data/easylist.to/easylistgermany/easylistgermany.txt https://easylist.to/easylistgermany/easylistgermany.txt"
+);
+
+execSync(
+  `curl -o extension.zip -H "BraveServiceKey: ${apiKey}" ` +
+    `https://brave-core-ext.s3.brave.com/release/${extensionId}/extension_${versionNumber}.crx`
+);
+
+const tempDir = fs.mkdtempSync("temp-brave-list");
+const listPath = path.join(tempDir, "list.txt");
+try {
+  execSync("unzip extension.zip -d " + tempDir);
+} catch (e) {
+  if (!fs.existsSync(listPath)) {
+    console.error("Failed to find list.txt in extension.zip");
+    process.exit(1);
+  }
+}
+
+execSync(`mv -f ${listPath} data/brave/brave-main-list.txt`);
@@ -14,3 +14,6 @@ serde =  { version = "1.0", features = ["derive", "rc"] }
 serde_json = "1.0"
 adblock = { path = "../", features = ["css-validation", "content-blocking", "resource-assembler"] }
 neon = { version = "^0.10.1", default-features = false, features = ["napi-1"] }
+
+[features]
+default-panic-hook = []
@@ -34,6 +34,7 @@
     "build": "cd js && cargo-cp-artifact -nc index.node -- cargo build --message-format=json-render-diagnostics",
     "build-debug": "npm run build --",
     "build-release": "npm run build -- --release",
+    "update-lists": "node data/update-lists.js",
     "install": "npm run build-release",
     "test": "cargo test"
   }

@@ -2187,10 +2187,20 @@ mod legacy_rule_parsing_tests {
     // difference from original counts caused by not handling document/subdocument options and possibly miscounting on the blocker side.
     // Printing all non-cosmetic, non-html, non-comment/-empty rules and ones with no unsupported options yields 29142 items
     // This engine also handles 3 rules that old one does not
-    const EASY_LIST: ListCounts = ListCounts { filters: 24064, cosmetic_filters: 31163, exceptions: 5796, duplicates: 0 };
+    const EASY_LIST: ListCounts = ListCounts {
+        filters: 35597, // 36259 - 662 exceptions
+        cosmetic_filters: if cfg!(feature = "css-validation") { 23072 } else { 23080 },
+        exceptions: 662,
+        duplicates: 0
+    };
     // easyPrivacy = { 11817, 0, 0, 1020 };
     // differences in counts explained by hashset size underreporting as detailed in the next two cases
-    const EASY_PRIVACY: ListCounts = ListCounts { filters: 11889, cosmetic_filters: 0, exceptions: 1021, duplicates: 2 };
+    const EASY_PRIVACY: ListCounts = ListCounts {
+        filters: 52278, // 52998 - 720 exceptions
+        cosmetic_filters: 21,
+        exceptions: 720,
+        duplicates: 2
+    };
     // ublockUnbreak = { 4, 8, 0, 94 };
     // differences in counts explained by client.hostAnchoredExceptionHashSet->GetSize() underreporting when compared to client.numHostAnchoredExceptionFilters
     const UBLOCK_UNBREAK: ListCounts = ListCounts { filters: 4, cosmetic_filters: 8, exceptions: 98, duplicates: 0 };
@@ -2238,12 +2248,12 @@ mod legacy_rule_parsing_tests {
 
     #[test]
     fn parse_easylist() {
-        check_list_counts(["./data/test/easylist.txt"], FilterFormat::Standard, EASY_LIST);
+        check_list_counts(["./data/easylist.to/easylist/easylist.txt"], FilterFormat::Standard, EASY_LIST);
     }
 
     #[test]
     fn parse_easyprivacy() {
-        check_list_counts(["./data/test/easyprivacy.txt"], FilterFormat::Standard, EASY_PRIVACY);
+        check_list_counts(["./data/easylist.to/easylist/easyprivacy.txt"], FilterFormat::Standard, EASY_PRIVACY);
     }
 
     #[test]
@@ -2286,8 +2296,8 @@ mod legacy_rule_parsing_tests {
         let expectation = EASY_LIST + EASY_PRIVACY + UBLOCK_UNBREAK + BRAVE_UNBREAK;
         check_list_counts(
             [
-                "./data/test/easylist.txt",
-                "./data/test/easyprivacy.txt",
+                "./data/easylist.to/easylist/easylist.txt",
+                "./data/easylist.to/easylist/easyprivacy.txt",
                 "./data/test/ublock-unbreak.txt",
                 "./data/test/brave-unbreak.txt",
             ],

@@ -3418,7 +3418,7 @@ mod hash_collision_tests {
         let rules = test_utils::rules_from_lists([
             "data/easylist.to/easylist/easylist.txt",
             "data/easylist.to/easylist/easyprivacy.txt",
-        ]);
+        ]).filter(|f| f != "||www.bred4tula.com^"); // remove known collision
         let (network_filters, _) = parse_filters(rules, true, Default::default());
 
         let mut filter_ids: HashMap<Hash, String> = HashMap::new();