Store compile benchmark codegen backend into the database

Kobzol · Kobzol · commit ef64e686eedc · 2023-11-24T21:05:07.000+01:00
Since we're already modifying the pstat_series table,
this commit also renames the cache column to scenario,
and the statistic column to metric, to resolve a long-standing
mismatch in the terminology.
diff --git a/collector/src/compile/execute/bencher.rs b/collector/src/compile/execute/bencher.rs
@@ -92,8 +92,14 @@ impl<'a> BenchProcessor<'a> {
             Profile::Clippy => database::Profile::Clippy,
         };
 
+        let backend = match backend {
+            CodegenBackend::Llvm => database::CodegenBackend::Llvm,
+        };
+
         if let Some(files) = stats.2 {
             if env::var_os("RUSTC_PERF_UPLOAD_TO_S3").is_some() {
+                // FIXME: Record codegen backend in the self profile name
+
                 // We can afford to have the uploads run concurrently with
                 // rustc. Generally speaking, they take up almost no CPU time
                 // (just copying data into the network). Plus, during
@@ -131,6 +137,7 @@ impl<'a> BenchProcessor<'a> {
                 self.benchmark.0.as_str(),
                 profile,
                 scenario,
+                backend,
                 stat,
                 value,
             ));
diff --git a/database/schema.md b/database/schema.md
@@ -26,15 +26,16 @@ Here is the diagram for compile-time benchmarks:
 │ └────────────┘  └───────────────┘ │└────────────┘ │
 │                                   │               │
 │                                   │               │
-│ ┌───────────────┐  ┌──────────┐   |               │
+│ ┌───────────────┐  ┌──────────┐   │               │
 │ │ pstat_series  │  │ pstat    │   │               │
 │ ├───────────────┤  ├──────────┤   │               │ 
 │ │ id *          │◄┐│ id *     │   │               │
 └─┤ crate         │ └┤ series   │   │               │ 
   │ profile       │  │ aid      ├───┼───────────────┘
-  │ cache         │  │ cid      ├───┘
-  │ statistic     │  │ value    │
-  └───────────────┘  └──────────┘
+  │ scenario      │  │ cid      │   │
+  │ backend       │  │ value    ├───┘
+  │ metric        │  └──────────┘
+  └───────────────┘
 ```
 
 For runtime benchmarks the schema very similar, but there are different table names:
@@ -140,19 +141,20 @@ of a crate, profile, scenario and the metric being collected.
 
 * crate (aka `benchmark`): the benchmarked crate which might be a crate from crates.io or a crate made specifically to stress some part of the compiler.
 * profile: what type of compilation is happening - check build, optimized build (a.k.a. release build), debug build, or doc build.
-* cache (aka `scenario`): describes how much of the incremental cache is full. An empty incremental cache means that the compiler must do a full build.
-* statistic (aka `metric`): the type of metric being collected
+* scenario: describes how much of the incremental cache is full. An empty incremental cache means that the compiler must do a full build.
+* backend: codegen backend used for compilation.
+* metric: the type of metric being collected.
 
 This corresponds to a [`statistic description`](../docs/glossary.md).
 
-There is a separate table for this collection to avoid duplicating crates, prfiles, scenarios etc.
+There is a separate table for this collection to avoid duplicating crates, profiles, scenarios etc.
 many times in the `pstat` table.
 
 ```
 sqlite> select * from pstat_series limit 1;
-id          crate       profile     cache       statistic
-----------  ----------  ----------  ----------  ------------
-1           helloworld  check       full        task-clock:u
+id          crate       profile     scenario    backend  metric
+----------  ----------  ----------  ----------  -------  ------------
+1           helloworld  check       full        llvm     task-clock:u
 ```
 
 ### pstat
diff --git a/database/src/bin/import-sqlite.rs b/database/src/bin/import-sqlite.rs
@@ -45,7 +45,7 @@ async fn main() {
         let sqlite_aid = sqlite_conn.artifact_id(&aid).await;
         let postgres_aid = postgres_conn.artifact_id(&aid).await;
 
-        for (&(benchmark, profile, scenario, metric), id) in
+        for (&(benchmark, profile, scenario, backend, metric), id) in
             sqlite_idx.compile_statistic_descriptions()
         {
             if benchmarks.insert(benchmark) {
@@ -73,6 +73,7 @@ async fn main() {
                         &benchmark,
                         profile,
                         scenario,
+                        backend,
                         metric.as_str(),
                         stat,
                     )
diff --git a/database/src/lib.rs b/database/src/lib.rs
@@ -360,6 +360,39 @@ impl PartialOrd for Scenario {
     }
 }
 
+/// The codegen backend used for compilation.
+#[derive(
+    Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
+)]
+pub enum CodegenBackend {
+    /// The default LLVM backend
+    Llvm,
+}
+
+impl CodegenBackend {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            CodegenBackend::Llvm => "llvm",
+        }
+    }
+}
+
+impl FromStr for CodegenBackend {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(match s.to_ascii_lowercase().as_str() {
+            "llvm" => CodegenBackend::Llvm,
+            _ => return Err(format!("{} is not a codegen backend", s)),
+        })
+    }
+}
+
+impl fmt::Display for CodegenBackend {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
 /// An identifier for a built version of the compiler
 #[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum ArtifactId {
@@ -427,7 +460,7 @@ pub struct Index {
     artifacts: Indexed<Box<str>>,
     /// Id lookup of compile stat description ids
     /// For legacy reasons called `pstat_series` in the database, and so the name is kept here.
-    pstat_series: Indexed<(Benchmark, Profile, Scenario, Metric)>,
+    pstat_series: Indexed<(Benchmark, Profile, Scenario, CodegenBackend, Metric)>,
     /// Id lookup of runtime stat description ids
     runtime_pstat_series: Indexed<(Benchmark, Metric)>,
 }
@@ -547,6 +580,7 @@ pub enum DbLabel {
         benchmark: Benchmark,
         profile: Profile,
         scenario: Scenario,
+        backend: CodegenBackend,
         metric: Metric,
     },
 }
@@ -564,10 +598,11 @@ impl Lookup for DbLabel {
                 benchmark,
                 profile,
                 scenario,
+                backend,
                 metric,
             } => index
                 .pstat_series
-                .get(&(*benchmark, *profile, *scenario, *metric)),
+                .get(&(*benchmark, *profile, *scenario, *backend, *metric)),
         }
     }
 }
@@ -617,7 +652,7 @@ impl Index {
         self.pstat_series
             .map
             .keys()
-            .map(|(_, _, _, metric)| metric)
+            .map(|(_, _, _, _, metric)| metric)
             .collect::<std::collections::HashSet<_>>()
             .into_iter()
             .map(|s| s.to_string())
@@ -643,7 +678,7 @@ impl Index {
         &self,
     ) -> impl Iterator<
         Item = (
-            &(Benchmark, Profile, Scenario, Metric),
+            &(Benchmark, Profile, Scenario, CodegenBackend, Metric),
             StatisticalDescriptionId,
         ),
     > + '_ {
diff --git a/database/src/pool.rs b/database/src/pool.rs
@@ -1,4 +1,4 @@
-use crate::{ArtifactCollection, ArtifactId, ArtifactIdNumber, CompileBenchmark};
+use crate::{ArtifactCollection, ArtifactId, ArtifactIdNumber, CodegenBackend, CompileBenchmark};
 use crate::{CollectionId, Index, Profile, QueuedCommit, Scenario, Step};
 use chrono::{DateTime, Utc};
 use hashbrown::HashMap;
@@ -44,6 +44,7 @@ pub trait Connection: Send + Sync {
         benchmark: &str,
         profile: Profile,
         scenario: Scenario,
+        backend: CodegenBackend,
         metric: &str,
         value: f64,
     );
diff --git a/database/src/pool/postgres.rs b/database/src/pool/postgres.rs
@@ -1,7 +1,7 @@
 use crate::pool::{Connection, ConnectionManager, ManagedConnection, Transaction};
 use crate::{
-    ArtifactCollection, ArtifactId, ArtifactIdNumber, Benchmark, CollectionId, Commit, CommitType,
-    CompileBenchmark, Date, Index, Profile, QueuedCommit, Scenario,
+    ArtifactCollection, ArtifactId, ArtifactIdNumber, Benchmark, CodegenBackend, CollectionId,
+    Commit, CommitType, CompileBenchmark, Date, Index, Profile, QueuedCommit, Scenario,
 };
 use anyhow::Context as _;
 use chrono::{DateTime, TimeZone, Utc};
@@ -254,6 +254,15 @@ static MIGRATIONS: &[&str] = &[
         UNIQUE(aid, component)
     );
     "#,
+    // Add codegen backend column and add it to the unique constraint.
+    // Also rename cache to scenario and statistic to metric, while we're at it.
+    r#"
+    alter table pstat_series rename column cache to scenario;
+    alter table pstat_series rename column statistic to metric;
+    alter table pstat_series add backend text not null default 'llvm';
+    alter table pstat_series drop constraint pstat_series_crate_profile_cache_statistic_key;
+    alter table pstat_series add constraint test_case UNIQUE(crate, profile, scenario, backend, metric);
+    "#,
 ];
 
 #[async_trait::async_trait]
@@ -441,8 +450,8 @@ impl PostgresConnection {
                     .await
                     .unwrap(),
                 get_error: conn.prepare("select benchmark, error from error where aid = $1").await.unwrap(),
-                insert_pstat_series: conn.prepare("insert into pstat_series (crate, profile, cache, statistic) VALUES ($1, $2, $3, $4) ON CONFLICT DO NOTHING RETURNING id").await.unwrap(),
-                select_pstat_series: conn.prepare("select id from pstat_series where crate = $1 and profile = $2 and cache = $3 and statistic = $4").await.unwrap(),
+                insert_pstat_series: conn.prepare("insert into pstat_series (crate, profile, scenario, backend, metric) VALUES ($1, $2, $3, $4, $5) ON CONFLICT DO NOTHING RETURNING id").await.unwrap(),
+                select_pstat_series: conn.prepare("select id from pstat_series where crate = $1 and profile = $2 and scenario = $3 and backend = $4 and metric = $5").await.unwrap(),
                 collection_id: conn.prepare("insert into collection (perf_commit) VALUES ($1) returning id").await.unwrap(),
                 record_duration: conn.prepare("
                     insert into artifact_collection_duration (
@@ -592,7 +601,7 @@ where
             pstat_series: self
                 .conn()
                 .query(
-                    "select id, crate, profile, cache, statistic from pstat_series;",
+                    "select id, crate, profile, scenario, backend, metric from pstat_series;",
                     &[],
                 )
                 .await
@@ -605,7 +614,8 @@ where
                             Benchmark::from(row.get::<_, String>(1).as_str()),
                             Profile::from_str(row.get::<_, String>(2).as_str()).unwrap(),
                             row.get::<_, String>(3).as_str().parse().unwrap(),
-                            row.get::<_, String>(4).as_str().into(),
+                            CodegenBackend::from_str(row.get::<_, String>(4).as_str()).unwrap(),
+                            row.get::<_, String>(5).as_str().into(),
                         ),
                     )
                 })
@@ -799,16 +809,18 @@ where
         benchmark: &str,
         profile: Profile,
         scenario: Scenario,
+        backend: CodegenBackend,
         metric: &str,
         stat: f64,
     ) {
         let profile = profile.to_string();
         let scenario = scenario.to_string();
+        let backend = backend.to_string();
         let sid = self
             .conn()
             .query_opt(
                 &self.statements().select_pstat_series,
-                &[&benchmark, &profile, &scenario, &metric],
+                &[&benchmark, &profile, &scenario, &backend, &metric],
             )
             .await
             .unwrap();
@@ -818,14 +830,14 @@ where
                 self.conn()
                     .query_opt(
                         &self.statements().insert_pstat_series,
-                        &[&benchmark, &profile, &scenario, &metric],
+                        &[&benchmark, &profile, &scenario, &backend, &metric],
                     )
                     .await
                     .unwrap();
                 self.conn()
                     .query_one(
                         &self.statements().select_pstat_series,
-                        &[&benchmark, &profile, &scenario, &metric],
+                        &[&benchmark, &profile, &scenario, &backend, &metric],
                     )
                     .await
                     .unwrap()
diff --git a/database/src/pool/sqlite.rs b/database/src/pool/sqlite.rs
@@ -1,7 +1,7 @@
 use crate::pool::{Connection, ConnectionManager, ManagedConnection, Transaction};
 use crate::{
-    ArtifactCollection, ArtifactId, Benchmark, CollectionId, Commit, CommitType, CompileBenchmark,
-    Date, Profile,
+    ArtifactCollection, ArtifactId, Benchmark, CodegenBackend, CollectionId, Commit, CommitType,
+    CompileBenchmark, Date, Profile,
 };
 use crate::{ArtifactIdNumber, Index, QueuedCommit};
 use chrono::{DateTime, NaiveDateTime, TimeZone, Utc};
@@ -367,6 +367,24 @@ static MIGRATIONS: &[Migration] = &[
         );
     "#,
     ),
+    // Add codegen backend column and add it to the unique constraint.
+    // Also rename cache to scenario and statistic to metric, while we're at it.
+    Migration::without_foreign_key_constraints(
+        r#"
+        create table pstat_series_new(
+            id integer primary key not null,
+            crate text not null references benchmark(name) on delete cascade on update cascade,
+            profile text not null,
+            scenario text not null,
+            backend text not null,
+            metric text not null,
+            UNIQUE(crate, profile, scenario, backend, metric)
+        );
+        insert into pstat_series_new select id, crate, profile, cache, 'llvm', statistic from pstat_series;
+        drop table pstat_series;
+        alter table pstat_series_new rename to pstat_series;
+    "#,
+    ),
 ];
 
 #[async_trait::async_trait]
@@ -482,7 +500,7 @@ impl Connection for SqliteConnection {
             .collect();
         let pstat_series = self
             .raw()
-            .prepare("select id, crate, profile, cache, statistic from pstat_series;")
+            .prepare("select id, crate, profile, scenario, backend, metric from pstat_series;")
             .unwrap()
             .query_map(params![], |row| {
                 Ok((
@@ -491,7 +509,8 @@ impl Connection for SqliteConnection {
                         Benchmark::from(row.get::<_, String>(1)?.as_str()),
                         Profile::from_str(row.get::<_, String>(2)?.as_str()).unwrap(),
                         row.get::<_, String>(3)?.as_str().parse().unwrap(),
-                        row.get::<_, String>(4)?.as_str().into(),
+                        CodegenBackend::from_str(row.get::<_, String>(4)?.as_str()).unwrap(),
+                        row.get::<_, String>(5)?.as_str().into(),
                     ),
                 ))
             })
@@ -633,21 +652,25 @@ impl Connection for SqliteConnection {
         benchmark: &str,
         profile: Profile,
         scenario: crate::Scenario,
+        backend: CodegenBackend,
         metric: &str,
         value: f64,
     ) {
         let profile = profile.to_string();
         let scenario = scenario.to_string();
-        self.raw_ref().execute("insert or ignore into pstat_series (crate, profile, cache, statistic) VALUES (?, ?, ?, ?)", params![
+        let backend = backend.to_string();
+        self.raw_ref().execute("insert or ignore into pstat_series (crate, profile, scenario, backend, metric) VALUES (?, ?, ?, ?, ?)", params![
             &benchmark,
             &profile,
             &scenario,
+            &backend,
             &metric,
         ]).unwrap();
-        let sid: i32 = self.raw_ref().query_row("select id from pstat_series where crate = ? and profile = ? and cache = ? and statistic = ?", params![
+        let sid: i32 = self.raw_ref().query_row("select id from pstat_series where crate = ? and profile = ? and scenario = ? and backend = ? and metric = ?", params![
             &benchmark,
             &profile,
             &scenario,
+            &backend,
             &metric,
         ], |r| r.get(0)).unwrap();
         self.raw_ref()
diff --git a/docs/glossary.md b/docs/glossary.md
@@ -21,6 +21,8 @@ The following is a glossary of domain specific terminology. Although benchmarks
   - `incr-full`: incremental compilation is used, with an empty incremental cache.
   - `incr-unchanged`: incremental compilation is used, with a full incremental cache and no code changes made.
   - `incr-patched`: incremental compilation is used, with a full incremental cache and some code changes made.
+* **backend**: the codegen backend used for compiling Rust code.
+  - `llvm`: the default codegen backend
 * **category**: a high-level group of benchmarks. Currently, there are three categories, primary (mostly real-world crates), secondary (mostly stress tests), and stable (old real-world crates, only used for the dashboard).
 * **artifact type**: describes what kind of artifact does the benchmark build. Either `library` or `binary`.
 
diff --git a/site/src/comparison.rs b/site/src/comparison.rs
diff --git a/site/src/selector.rs b/site/src/selector.rs