mongodb · isabelatkinson · Feb 3, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
@@ -290,7 +290,7 @@ tasks:
     commands:
       - func: "bootstrap mongo-orchestration"
         vars:
-          MONGODB_VERSION: "v6.0-perf"
+          MONGODB_VERSION: "v8.0-perf"
           # Note that drivers-evergreen-tools expects `SSL` as the environmental
           # variable, not `TLS`, so we have to use that for the actual value used in the
           # script; we use `TLS` for the metadata that isn't used by the actual shell
@@ -364,4 +364,3 @@ buildvariants:
   display_name: "Compile"
   tasks:
     - "benchmark-compile"
-
@@ -1,5 +1,6 @@
 pub mod bson_decode;
 pub mod bson_encode;
+pub mod bulk_write;
 pub mod find_many;
 pub mod find_one;
 pub mod gridfs_download;
@@ -59,17 +60,20 @@ pub static TARGET_ITERATION_COUNT: Lazy<usize> = Lazy::new(|| {
 
 #[async_trait::async_trait]
 pub trait Benchmark: Sized {
+    /// The options used to construct the benchmark.
     type Options;
+    /// The state needed to perform the benchmark task.
+    type TaskState: Default;
 
     /// execute once before benchmarking
     async fn setup(options: Self::Options) -> Result<Self>;
 
     /// execute at the beginning of every iteration
-    async fn before_task(&mut self) -> Result<()> {
-        Ok(())
+    async fn before_task(&self) -> Result<Self::TaskState> {
+        Ok(Default::default())
     }
 
-    async fn do_task(&self) -> Result<()>;
+    async fn do_task(&self, state: Self::TaskState) -> Result<()>;
 
     /// execute at the end of every iteration
     async fn after_task(&self) -> Result<()> {
@@ -108,7 +112,7 @@ fn finished(duration: Duration, iter: usize) -> bool {
 pub async fn run_benchmark<B: Benchmark + Send + Sync>(
     options: B::Options,
 ) -> Result<Vec<Duration>> {
-    let mut test = B::setup(options).await?;
+    let test = B::setup(options).await?;
 
     let mut test_durations = Vec::new();
 
@@ -127,9 +131,9 @@ pub async fn run_benchmark<B: Benchmark + Send + Sync>(
     while !finished(benchmark_timer.elapsed(), iter) {
         progress_bar.inc(1);
 
-        test.before_task().await?;
+        let state = test.before_task().await?;
         let timer = Instant::now();
-        test.do_task().await?;
+        test.do_task(state).await?;
         test_durations.push(timer.elapsed());
         test.after_task().await?;
 
@@ -152,13 +156,13 @@ pub async fn drop_database(uri: &str, database: &str) -> Result<()> {
         .run_command(doc! { "hello": true })
         .await?;
 
-    client.database(&database).drop().await?;
+    client.database(database).drop().await?;
 
     // in sharded clusters, take additional steps to ensure database is dropped completely.
     // see: https://www.mongodb.com/docs/manual/reference/method/db.dropDatabase/#replica-set-and-sharded-clusters
     let is_sharded = hello.get_str("msg").ok() == Some("isdbgrid");
     if is_sharded {
-        client.database(&database).drop().await?;
+        client.database(database).drop().await?;
         for host in options.hosts {
             client
                 .database("admin")

@@ -1,10 +1,7 @@
-use std::{convert::TryInto, path::PathBuf};
+use anyhow::Result;
+use mongodb::bson::Document;
 
-use anyhow::{bail, Result};
-use mongodb::bson::{Bson, Document};
-use serde_json::Value;
-
-use crate::{bench::Benchmark, fs::read_to_string};
+use crate::bench::Benchmark;
 
 pub struct BsonDecodeBenchmark {
     num_iter: usize,
@@ -13,36 +10,27 @@ pub struct BsonDecodeBenchmark {
 
 pub struct Options {
     pub num_iter: usize,
-    pub path: PathBuf,
+    pub doc: Document,
 }
 
 #[async_trait::async_trait]
 impl Benchmark for BsonDecodeBenchmark {
     type Options = Options;
+    type TaskState = ();
 
     async fn setup(options: Self::Options) -> Result<Self> {
-        let mut file = read_to_string(&options.path).await?;
-
-        let json: Value = serde_json::from_str(&mut file)?;
-        let doc = match json.try_into()? {
-            Bson::Document(doc) => doc,
-            _ => bail!("invalid json test file"),
-        };
-
         let mut bytes: Vec<u8> = Vec::new();
-        doc.to_writer(&mut bytes)?;
+        options.doc.to_writer(&mut bytes)?;
 
         Ok(BsonDecodeBenchmark {
             num_iter: options.num_iter,
             bytes,
         })
     }
 
-    async fn do_task(&self) -> Result<()> {
+    async fn do_task(&self, _state: Self::TaskState) -> Result<()> {
         for _ in 0..self.num_iter {
-            // `&[u8]` implements `Read`, and `from_reader` needs a `&mut R: Read`, so we need a
-            // `&mut &[u8]`.
-            let _doc = Document::from_reader(&mut &self.bytes[..])?;
+            let _doc = Document::from_reader(&self.bytes[..])?;
         }
 
         Ok(())

@@ -1,10 +1,7 @@
-use std::{convert::TryInto, path::PathBuf};
+use anyhow::Result;
+use mongodb::bson::Document;
 
-use anyhow::{bail, Result};
-use mongodb::bson::{Bson, Document};
-use serde_json::Value;
-
-use crate::{bench::Benchmark, fs::read_to_string};
+use crate::bench::Benchmark;
 
 pub struct BsonEncodeBenchmark {
     num_iter: usize,
@@ -13,29 +10,22 @@ pub struct BsonEncodeBenchmark {
 
 pub struct Options {
     pub num_iter: usize,
-    pub path: PathBuf,
+    pub doc: Document,
 }
 
 #[async_trait::async_trait]
 impl Benchmark for BsonEncodeBenchmark {
     type Options = Options;
+    type TaskState = ();
 
     async fn setup(options: Self::Options) -> Result<Self> {
-        let mut file = read_to_string(&options.path).await?;
-
-        let json: Value = serde_json::from_str(&mut file)?;
-        let doc = match json.try_into()? {
-            Bson::Document(doc) => doc,
-            _ => bail!("invalid json test file"),
-        };
-
         Ok(BsonEncodeBenchmark {
             num_iter: options.num_iter,
-            doc,
+            doc: options.doc,
         })
     }
 
-    async fn do_task(&self) -> Result<()> {
+    async fn do_task(&self, _state: Self::TaskState) -> Result<()> {
         for _ in 0..self.num_iter {
             let mut bytes: Vec<u8> = Vec::new();
             self.doc.to_writer(&mut bytes)?;

@@ -0,0 +1,149 @@
+use anyhow::Result;
+use mongodb::{
+    bson::{doc, Document},
+    options::{DeleteOneModel, InsertOneModel, ReplaceOneModel, WriteModel},
+    Client,
+    Namespace,
+};
+use once_cell::sync::Lazy;
+
+use super::{drop_database, Benchmark, COLL_NAME, DATABASE_NAME};
+
+pub struct InsertBulkWriteBenchmark {
+    client: Client,
+    uri: String,
+    write_models: Vec<WriteModel>,
+}
+
+pub struct Options {
+    pub uri: String,
+    pub doc: Document,
+    pub num_models: usize,
+}
+
+#[async_trait::async_trait]
+impl Benchmark for InsertBulkWriteBenchmark {
+    type Options = Options;
+    type TaskState = Vec<WriteModel>;
+
+    async fn setup(options: Self::Options) -> Result<Self> {
+        let client = Client::with_uri_str(&options.uri).await?;
+        drop_database(options.uri.as_str(), DATABASE_NAME.as_str()).await?;
+
+        let write_models = vec![
+            WriteModel::InsertOne(
+                InsertOneModel::builder()
+                    .namespace(Namespace::new(DATABASE_NAME.as_str(), COLL_NAME.as_str()))
+                    .document(options.doc.clone())
+                    .build()
+            );
+            options.num_models
+        ];
+
+        Ok(Self {
+            client,
+            uri: options.uri,
+            write_models,
+        })
+    }
+
+    async fn before_task(&self) -> Result<Self::TaskState> {
+        self.client
+            .database(&DATABASE_NAME)
+            .collection::<Document>(&COLL_NAME)
+            .drop()
+            .await?;
+        self.client
+            .database(&DATABASE_NAME)
+            .create_collection(COLL_NAME.as_str())
+            .await?;
+        Ok(self.write_models.clone())
+    }
+
+    async fn do_task(&self, write_models: Self::TaskState) -> Result<()> {
+        self.client.bulk_write(write_models).await?;
+        Ok(())
+    }
+
+    async fn teardown(&self) -> Result<()> {
+        drop_database(self.uri.as_str(), DATABASE_NAME.as_str()).await?;
+        Ok(())
+    }
+}
+
+static COLLECTION_NAMES: Lazy<Vec<String>> =
+    Lazy::new(|| (1..=10).map(|i| format!("corpus_{}", i)).collect());
+
+pub struct MixedBulkWriteBenchmark {
+    client: Client,
+    uri: String,
+    write_models: Vec<WriteModel>,
+}
+
+#[async_trait::async_trait]
+impl Benchmark for MixedBulkWriteBenchmark {
+    type Options = Options;
+    type TaskState = Vec<WriteModel>;
+
+    async fn setup(options: Self::Options) -> Result<Self> {
+        let client = Client::with_uri_str(&options.uri).await?;
+        drop_database(options.uri.as_str(), DATABASE_NAME.as_str()).await?;
+
+        let mut write_models = Vec::new();
+        for i in 0..options.num_models {
+            let collection_name = COLLECTION_NAMES.get(i % 10).unwrap();
+            let namespace = Namespace::new(DATABASE_NAME.as_str(), collection_name);
+            if i % 3 == 0 {
+                write_models.push(
+                    InsertOneModel::builder()
+                        .namespace(namespace)
+                        .document(options.doc.clone())
+                        .build()
+                        .into(),
+                );
+            } else if i % 3 == 1 {
+                write_models.push(
+                    ReplaceOneModel::builder()
+                        .namespace(namespace)
+                        .filter(doc! {})
+                        .replacement(options.doc.clone())
+                        .build()
+                        .into(),
+                );
+            } else {
+                write_models.push(
+                    DeleteOneModel::builder()
+                        .namespace(namespace)
+                        .filter(doc! {})
+                        .build()
+                        .into(),
+                );
+            }
+        }
+
+        Ok(Self {
+            client,
+            uri: options.uri,
+            write_models,
+        })
+    }
+
+    async fn before_task(&self) -> Result<Self::TaskState> {
+        let database = self.client.database(&DATABASE_NAME);
+        database.drop().await?;
+        for collection_name in COLLECTION_NAMES.iter() {
+            database.create_collection(collection_name).await?;
+        }
+        Ok(self.write_models.clone())
+    }
+
+    async fn do_task(&self, write_models: Self::TaskState) -> Result<()> {
+        self.client.bulk_write(write_models).await?;
+        Ok(())
+    }
+
+    async fn teardown(&self) -> Result<()> {
+        drop_database(self.uri.as_str(), DATABASE_NAME.as_str()).await?;
+        Ok(())
+    }
+}