Skip to content

Commit e8bd542

Browse files
committed
Include probably relevant changes in triage for now
1 parent 0af149a commit e8bd542

File tree

1 file changed

+59
-31
lines changed

1 file changed

+59
-31
lines changed

site/src/comparison.rs

+59-31
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ pub async fn handle_triage(
3737
let mut report = HashMap::new();
3838
let mut before = start.clone();
3939

40+
let mut num_comparisons = 0;
4041
loop {
4142
let comparison = match compare_given_commits(
4243
before,
@@ -56,6 +57,7 @@ pub async fn handle_triage(
5657
break;
5758
}
5859
};
60+
num_comparisons += 1;
5961
log::info!(
6062
"Comparing {} to {}",
6163
comparison.b.artifact,
@@ -77,7 +79,7 @@ pub async fn handle_triage(
7779
}
7880
let end = end.unwrap_or(next);
7981

80-
let report = generate_report(&start, &end, report).await;
82+
let report = generate_report(&start, &end, report, num_comparisons).await;
8183
Ok(api::triage::Response(report))
8284
}
8385

@@ -120,11 +122,17 @@ pub async fn handle_compare(
120122
})
121123
}
122124

123-
async fn populate_report(comparison: &Comparison, report: &mut HashMap<Direction, Vec<String>>) {
125+
async fn populate_report(
126+
comparison: &Comparison,
127+
report: &mut HashMap<Option<Direction>, Vec<String>>,
128+
) {
124129
if let Some(summary) = ComparisonSummary::summarize_comparison(comparison) {
125-
if summary.confidence().is_definitely_relevant() {
130+
let confidence = summary.confidence();
131+
if confidence.is_atleast_probably_relevant() {
126132
if let Some(direction) = summary.direction() {
127-
let entry = report.entry(direction).or_default();
133+
let entry = report
134+
.entry(confidence.is_definitely_relevant().then(|| direction))
135+
.or_default();
128136

129137
entry.push(summary.write(comparison).await)
130138
}
@@ -140,7 +148,7 @@ pub struct ComparisonSummary {
140148
impl ComparisonSummary {
141149
pub fn summarize_comparison(comparison: &Comparison) -> Option<ComparisonSummary> {
142150
let mut comparisons = comparison
143-
.get_benchmarks()
151+
.get_individual_comparisons()
144152
.filter(|c| c.is_significant())
145153
.cloned()
146154
.collect::<Vec<_>>();
@@ -150,8 +158,9 @@ impl ComparisonSummary {
150158
}
151159

152160
let cmp = |b1: &TestResultComparison, b2: &TestResultComparison| {
153-
b2.log_change()
154-
.partial_cmp(&b1.log_change())
161+
b2.relative_change()
162+
.abs()
163+
.partial_cmp(&b1.relative_change().abs())
155164
.unwrap_or(std::cmp::Ordering::Equal)
156165
};
157166
comparisons.sort_by(cmp);
@@ -256,13 +265,15 @@ impl ComparisonSummary {
256265
num_very_large_changes,
257266
) {
258267
(_, _, vl) if vl > 0 => ComparisonConfidence::DefinitelyRelevant,
259-
(m, l, _) if m + (l * 2) > 5 => ComparisonConfidence::DefinitelyRelevant,
268+
(m, l, _) if m + (l * 2) > 4 => ComparisonConfidence::DefinitelyRelevant,
260269
(m, l, _) if m > 1 || l > 0 => ComparisonConfidence::ProbablyRelevant,
261-
(m, _, _) => match m * 2 + num_small_changes {
262-
0..=5 => ComparisonConfidence::MaybeRelevant,
263-
6..=10 => ComparisonConfidence::ProbablyRelevant,
264-
_ => ComparisonConfidence::DefinitelyRelevant,
265-
},
270+
(m, _, _) => {
271+
if (m * 2 + num_small_changes) > 10 {
272+
ComparisonConfidence::ProbablyRelevant
273+
} else {
274+
ComparisonConfidence::MaybeRelevant
275+
}
276+
}
266277
}
267278
}
268279

@@ -539,7 +550,7 @@ impl Comparison {
539550
next_commit(&self.b.artifact, master_commits).map(|c| c.sha.clone())
540551
}
541552

542-
fn get_benchmarks(&self) -> impl Iterator<Item = &TestResultComparison> {
553+
fn get_individual_comparisons(&self) -> impl Iterator<Item = &TestResultComparison> {
543554
self.statistics.iter().filter(|b| b.profile != Profile::Doc)
544555
}
545556
}
@@ -726,35 +737,33 @@ impl TestResultComparison {
726737

727738
/// The amount of relative change considered significant when
728739
/// the test case is dodgy
729-
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY: f64 = 0.01;
730-
731-
fn log_change(&self) -> f64 {
732-
let (a, b) = self.results;
733-
(b / a).ln()
734-
}
740+
const SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY: f64 = 0.008;
735741

736742
fn is_regression(&self) -> bool {
737743
let (a, b) = self.results;
738744
b > a
739745
}
740746

741747
fn is_significant(&self) -> bool {
742-
let threshold = if self.is_dodgy() {
748+
self.relative_change().abs() > self.signifcance_threshold()
749+
}
750+
751+
fn signifcance_threshold(&self) -> f64 {
752+
if self.is_dodgy() {
743753
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD_DODGY
744754
} else {
745755
Self::SIGNIFICANT_RELATIVE_CHANGE_THRESHOLD
746-
};
747-
748-
self.relative_change().abs() > threshold
756+
}
749757
}
750758

751759
fn magnitude(&self) -> Magnitude {
752760
let mag = self.relative_change().abs();
753-
if mag < 0.01 {
761+
let threshold = self.signifcance_threshold();
762+
if mag < threshold * 3.0 {
754763
Magnitude::Small
755-
} else if mag < 0.04 {
764+
} else if mag < threshold * 10.0 {
756765
Magnitude::Medium
757-
} else if mag < 0.1 {
766+
} else if mag < threshold * 25.0 {
758767
Magnitude::Large
759768
} else {
760769
Magnitude::VeryLarge
@@ -873,7 +882,8 @@ impl std::fmt::Display for Magnitude {
873882
async fn generate_report(
874883
start: &Bound,
875884
end: &Bound,
876-
mut report: HashMap<Direction, Vec<String>>,
885+
mut report: HashMap<Option<Direction>, Vec<String>>,
886+
num_comparisons: usize,
877887
) -> String {
878888
fn fmt_bound(bound: &Bound) -> String {
879889
match bound {
@@ -884,9 +894,14 @@ async fn generate_report(
884894
}
885895
let start = fmt_bound(start);
886896
let end = fmt_bound(end);
887-
let regressions = report.remove(&Direction::Regression).unwrap_or_default();
888-
let improvements = report.remove(&Direction::Improvement).unwrap_or_default();
889-
let mixed = report.remove(&Direction::Mixed).unwrap_or_default();
897+
let regressions = report
898+
.remove(&Some(Direction::Regression))
899+
.unwrap_or_default();
900+
let improvements = report
901+
.remove(&Some(Direction::Improvement))
902+
.unwrap_or_default();
903+
let mixed = report.remove(&Some(Direction::Mixed)).unwrap_or_default();
904+
let unlabeled = report.remove(&None).unwrap_or_default();
890905
let untriaged = match github::untriaged_perf_regressions().await {
891906
Ok(u) => u
892907
.iter()
@@ -910,6 +925,8 @@ TODO: Summary
910925
911926
Triage done by **@???**.
912927
Revision range: [{first_commit}..{last_commit}](https://perf.rust-lang.org/?start={first_commit}&end={last_commit}&absolute=false&stat=instructions%3Au)
928+
{num_comparisons} comparisons made in total
929+
{num_def_relevant} definitely relevant comparisons and {num_prob_relevant} probably relevant comparisons
913930
914931
{num_regressions} Regressions, {num_improvements} Improvements, {num_mixed} Mixed; ??? of them in rollups
915932
@@ -925,6 +942,13 @@ Revision range: [{first_commit}..{last_commit}](https://perf.rust-lang.org/?star
925942
926943
{mixed}
927944
945+
#### Probably changed
946+
947+
The following is a list of comparisons which *probably* represent real performance changes,
948+
but we're not 100% sure.
949+
950+
{unlabeled}
951+
928952
#### Untriaged Pull Requests
929953
930954
{untriaged}
@@ -937,12 +961,16 @@ TODO: Nags
937961
date = chrono::Utc::today().format("%Y-%m-%d"),
938962
first_commit = start,
939963
last_commit = end,
964+
num_comparisons = num_comparisons,
965+
num_def_relevant = regressions.len() + improvements.len() + mixed.len(),
966+
num_prob_relevant = unlabeled.len(),
940967
num_regressions = regressions.len(),
941968
num_improvements = improvements.len(),
942969
num_mixed = mixed.len(),
943970
regressions = regressions.join("\n\n"),
944971
improvements = improvements.join("\n\n"),
945972
mixed = mixed.join("\n\n"),
973+
unlabeled = unlabeled.join("\n\n"),
946974
untriaged = untriaged
947975
)
948976
}

0 commit comments

Comments
 (0)