Skip to content

Commit 93c9b9a

Browse files
Move to inline-stored SHAs
1 parent baeb909 commit 93c9b9a

File tree

6 files changed

+255
-40
lines changed

6 files changed

+255
-40
lines changed

collector/src/git.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ use std::{
99
pub fn get_commit_or_fake_it(sha: &str) -> anyhow::Result<Commit> {
1010
Ok(get_rust_commits()?
1111
.iter()
12-
.find(|c| c.sha == sha)
12+
.find(|c| c.sha == *sha)
1313
.cloned()
1414
.unwrap_or_else(|| {
1515
log::warn!("utilizing fake commit!");
1616
Commit {
17-
sha: sha.to_string(),
17+
sha: sha.into(),
1818
date: crate::Date::ymd_hms(2000, 01, 01, 0, 0, 0),
1919
}
2020
}))
@@ -73,7 +73,7 @@ pub fn get_rust_commits() -> anyhow::Result<Vec<Commit>> {
7373
let date = crate::Date(chrono::DateTime::from_utc(date, chrono::Utc));
7474

7575
commits.push(Commit {
76-
sha: hash.to_string(),
76+
sha: hash.into(),
7777
date,
7878
});
7979
}

collector/src/lib.rs

Lines changed: 208 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,216 @@ pub mod self_profile;
1818

1919
pub use self_profile::{QueryData, SelfProfile};
2020

21+
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
22+
pub enum Sha {
23+
/// Straight-up bytes of the 40-long hex-encoded sha
24+
Hex([u8; 20]),
25+
/// Arbitrary bytes, usually a string ID provided by the user.
26+
/// Encodes 30 characters by restricting to 5 bits per character
27+
/// (enough for 32 different characters, so `a-z`, `-`, and `_`).
28+
Raw { length: u8, bytes: [u8; 19] },
29+
}
30+
31+
impl PartialEq<str> for Sha {
32+
fn eq(&self, other: &str) -> bool {
33+
self.to_string() == other
34+
}
35+
}
36+
37+
fn hex_decode(s: &str) -> Option<[u8; 20]> {
38+
let mut in_progress = 0;
39+
let mut v = [0; 20];
40+
for (idx, ch) in s.chars().enumerate() {
41+
let offset = if idx % 2 == 0 { 4 } else { 0 };
42+
in_progress |= (ch.to_digit(16)? as u8) << offset;
43+
if idx % 2 != 0 {
44+
v[idx / 2] = in_progress;
45+
in_progress = 0;
46+
}
47+
}
48+
Some(v)
49+
}
50+
51+
struct BitView {
52+
bytes: [u8; 19],
53+
bit_offset: usize,
54+
}
55+
56+
impl BitView {
57+
fn new() -> BitView {
58+
BitView {
59+
bytes: [0; 19],
60+
bit_offset: 0,
61+
}
62+
}
63+
64+
fn with_bytes(bytes: [u8; 19]) -> BitView {
65+
BitView {
66+
bytes,
67+
bit_offset: 0,
68+
}
69+
}
70+
71+
fn read(&mut self) -> bool {
72+
let r = (self.bytes[self.bit_offset / 8] & (1 << (self.bit_offset % 8))) != 0;
73+
self.bit_offset += 1;
74+
75+
if self.bit_offset > 19 * 8 {
76+
panic!("pushed past limit of 152 bits");
77+
}
78+
79+
r
80+
}
81+
82+
fn read5(&mut self) -> u8 {
83+
let mut v = 0;
84+
v |= (self.read() as u8) << 0;
85+
v |= (self.read() as u8) << 1;
86+
v |= (self.read() as u8) << 2;
87+
v |= (self.read() as u8) << 3;
88+
v |= (self.read() as u8) << 4;
89+
v
90+
}
91+
92+
fn push(&mut self, b: bool) {
93+
if b {
94+
self.bytes[self.bit_offset / 8] |= 1 << (self.bit_offset % 8);
95+
}
96+
self.bit_offset += 1;
97+
98+
if self.bit_offset > 19 * 8 {
99+
panic!("pushed past limit of 152 bits");
100+
}
101+
}
102+
103+
fn push5(&mut self, v: u8) {
104+
assert!(
105+
v <= 32,
106+
"`{}` must be less than 32 (i.e., no more than 5 bits)",
107+
v
108+
);
109+
110+
self.push(v & 0b10000 != 0);
111+
self.push(v & 0b01000 != 0);
112+
self.push(v & 0b00100 != 0);
113+
self.push(v & 0b00010 != 0);
114+
self.push(v & 0b00001 != 0);
115+
}
116+
}
117+
118+
impl<'a> From<&'a str> for Sha {
119+
fn from(s: &'a str) -> Sha {
120+
if let Some(v) = hex_decode(s) {
121+
return Sha::Hex(v);
122+
}
123+
124+
assert!(
125+
s.len() <= 30,
126+
"`{}` is too long ({}), can be at most 30 bytes",
127+
s,
128+
s.len(),
129+
);
130+
131+
let mut v = BitView::new();
132+
for b in s.as_bytes().iter() {
133+
let b = *b;
134+
match b {
135+
b'-' => {
136+
v.push5(0);
137+
}
138+
b'4' => {
139+
v.push5(1);
140+
}
141+
b'1' => {
142+
v.push5(2);
143+
}
144+
b'2' => {
145+
v.push5(3);
146+
}
147+
b'6' => {
148+
v.push5(4);
149+
}
150+
b'8' => {
151+
v.push5(5);
152+
}
153+
b'a'..=b'z' => {
154+
v.push5(b - b'a' + 6);
155+
}
156+
_ => panic!(
157+
"`{}` is not a valid character for SHA-like IDs, must be in a-z, or -, or _, in `{}`.",
158+
b as char,
159+
s,
160+
),
161+
}
162+
}
163+
Sha::Raw {
164+
length: s.len() as u8,
165+
bytes: v.bytes,
166+
}
167+
}
168+
}
169+
170+
impl Serialize for Sha {
171+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
172+
where
173+
S: serde::ser::Serializer,
174+
{
175+
serializer.collect_str(&self)
176+
}
177+
}
178+
179+
impl<'de> Deserialize<'de> for Sha {
180+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
181+
where
182+
D: serde::de::Deserializer<'de>,
183+
{
184+
let s: &'de str = <&'de str>::deserialize(deserializer)?;
185+
Ok(s.into())
186+
}
187+
}
188+
189+
impl fmt::Debug for Sha {
190+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
191+
write!(f, "{}", self)
192+
}
193+
}
194+
195+
impl fmt::Display for Sha {
196+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
197+
match *self {
198+
Sha::Hex(hex) => {
199+
for &b in hex.iter() {
200+
write!(f, "{:x}{:x}", b >> 4, b & 0xf)?;
201+
}
202+
}
203+
Sha::Raw { length, bytes } => {
204+
let mut v = BitView::with_bytes(bytes);
205+
let mut decoded = [0; 19];
206+
for idx in 0..length as usize {
207+
decoded[idx] = match v.read5() {
208+
0 => b'-',
209+
1 => b'4',
210+
2 => b'1',
211+
3 => b'2',
212+
4 => b'6',
213+
5 => b'8',
214+
other => other - 6 + b'a',
215+
};
216+
}
217+
write!(
218+
f,
219+
"{}",
220+
std::str::from_utf8(&decoded[..length as usize]).unwrap()
221+
)?;
222+
}
223+
}
224+
Ok(())
225+
}
226+
}
227+
21228
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
22229
pub struct Commit {
23-
pub sha: String,
230+
pub sha: Sha,
24231
pub date: Date,
25232
}
26233

site/src/api.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
//! The responses are calculated in the server.rs file.
1515
1616
use crate::load::CommitData;
17-
use collector::{Date, Run, StatId};
17+
use collector::{Date, Run, Sha, StatId};
1818
use serde::{Deserialize, Serialize};
1919
use std::collections::{BTreeSet, HashMap};
2020
use std::result::Result as StdResult;
@@ -23,7 +23,7 @@ use std::result::Result as StdResult;
2323
#[derive(Debug, Clone, Serialize, Deserialize)]
2424
pub struct DateData {
2525
pub date: Date,
26-
pub commit: String,
26+
pub commit: Sha,
2727
pub data: HashMap<String, Vec<(String, Run, f64)>>,
2828
}
2929

@@ -159,7 +159,7 @@ pub mod data {
159159
}
160160

161161
pub mod graph {
162-
use collector::Bound;
162+
use collector::{Bound, Sha};
163163
use serde::{Deserialize, Serialize};
164164
use std::collections::HashMap;
165165

@@ -188,7 +188,7 @@ pub mod graph {
188188
pub benchmarks: HashMap<String, HashMap<String, Vec<GraphData>>>,
189189
pub max: HashMap<String, f32>,
190190
pub colors: Vec<String>,
191-
pub commits: Vec<String>,
191+
pub commits: Vec<Sha>,
192192
}
193193
}
194194

site/src/load.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use collector::Date;
2525
use crate::api::github;
2626
use collector;
2727
pub use collector::{
28-
ArtifactData, Benchmark, Commit, CommitData, Patch, Run, RunId, StatId, Stats,
28+
ArtifactData, Benchmark, Commit, CommitData, Patch, Run, RunId, Sha, StatId, Stats,
2929
};
3030
use log::{error, info, trace, warn};
3131

@@ -102,7 +102,7 @@ pub struct Persistent {
102102
pub pending_try_builds: HashSet<u32>,
103103
// Set of commit hashes for which we've completed benchmarking.
104104
#[serde(default)]
105-
pub posted_ends: Vec<String>,
105+
pub posted_ends: Vec<Sha>,
106106
}
107107

108108
lazy_static::lazy_static! {
@@ -149,7 +149,7 @@ pub struct Keys {
149149
pub struct Config {
150150
pub keys: Keys,
151151
#[serde(default)]
152-
pub skip: HashSet<String>,
152+
pub skip: HashSet<Sha>,
153153
}
154154

155155
#[derive(Debug)]
@@ -169,7 +169,7 @@ pub struct InputData {
169169
/// The benchmarks we interpolated for a given commit.
170170
///
171171
/// Not all commits are in this map.
172-
pub interpolated: HashMap<String, Vec<Interpolation>>,
172+
pub interpolated: HashMap<Sha, Vec<Interpolation>>,
173173

174174
pub artifact_data: HashMap<String, ArtifactData>,
175175

@@ -586,12 +586,14 @@ impl InputData {
586586
let mut ret = Vec::new();
587587
ret.push((
588588
Commit {
589-
sha: sha.clone(),
589+
sha: sha.as_str().into(),
590590
date: Date::ymd_hms(2001, 01, 01, 0, 0, 0),
591591
},
592592
MissingReason::TryCommit,
593593
));
594-
if let Some(commit) = self.commits.iter().find(|c| c.sha == *parent_sha) {
594+
if let Some(commit) =
595+
self.commits.iter().find(|c| c.sha == *parent_sha.as_str())
596+
{
595597
ret.push((commit.clone(), MissingReason::TryParent));
596598
} else {
597599
// could not find parent SHA
@@ -632,7 +634,7 @@ struct AssociatedData<'a> {
632634
data: &'a [CommitData],
633635
commits: &'a [Commit],
634636
commit_map: &'a HashMap<Commit, usize>,
635-
interpolated: &'a mut HashMap<String, Vec<Interpolation>>,
637+
interpolated: &'a mut HashMap<Sha, Vec<Interpolation>>,
636638

637639
// By benchmark name, mapping to the index in the data vector
638640
last_seen_commit: &'a [HashMap<&'a str, usize>],
@@ -669,7 +671,7 @@ fn fill_benchmark_runs(
669671

670672
let interpolations = data
671673
.interpolated
672-
.entry(data.commit.sha.clone())
674+
.entry(data.commit.sha)
673675
.or_insert_with(Vec::new);
674676
let run = match (start, end) {
675677
(Some(srun), Some(erun)) => {
@@ -714,7 +716,7 @@ fn fill_benchmark_data(benchmark_name: &str, data: &mut AssociatedData<'_>) -> O
714716
let commit_idx = data.commit_idx;
715717
let interpolation_entry = data
716718
.interpolated
717-
.entry(data.commit.sha.clone())
719+
.entry(data.commit.sha)
718720
.or_insert_with(Vec::new);
719721

720722
let start = if let Some(&needle) = data.last_seen_commit[commit_idx].get(benchmark_name) {

0 commit comments

Comments
 (0)