Skip to content

Commit 0a68321

Browse files
authored
Merge pull request #1749 from ehuss/diff-only-once
Only fetch PR diff once.
2 parents bd0f9e6 + 9ff02b6 commit 0a68321

File tree

6 files changed

+153
-110
lines changed

6 files changed

+153
-110
lines changed

src/github.rs

Lines changed: 73 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,30 @@ pub struct Label {
232232
/// needed at this time (merged_at, diff_url, html_url, patch_url, url).
233233
#[derive(Debug, serde::Deserialize)]
234234
pub struct PullRequestDetails {
235-
// none for now
235+
/// This is a slot to hold the diff for a PR.
236+
///
237+
/// This will be filled in only once as an optimization since multiple
238+
/// handlers want to see PR changes, and getting the diff can be
239+
/// expensive.
240+
#[serde(skip)]
241+
files_changed: tokio::sync::OnceCell<Vec<FileDiff>>,
242+
}
243+
244+
/// Representation of a diff to a single file.
245+
#[derive(Debug)]
246+
pub struct FileDiff {
247+
/// The full path of the file.
248+
pub path: String,
249+
/// The diff for the file.
250+
pub diff: String,
251+
}
252+
253+
impl PullRequestDetails {
254+
pub fn new() -> PullRequestDetails {
255+
PullRequestDetails {
256+
files_changed: tokio::sync::OnceCell::new(),
257+
}
258+
}
236259
}
237260

238261
/// An issue or pull request.
@@ -786,23 +809,33 @@ impl Issue {
786809
}
787810

788811
/// Returns the diff in this event, for Open and Synchronize events for now.
789-
pub async fn diff(&self, client: &GithubClient) -> anyhow::Result<Option<String>> {
812+
///
813+
/// Returns `None` if the issue is not a PR.
814+
pub async fn diff(&self, client: &GithubClient) -> anyhow::Result<Option<&[FileDiff]>> {
815+
let Some(pr) = &self.pull_request else {
816+
return Ok(None);
817+
};
790818
let (before, after) = if let (Some(base), Some(head)) = (&self.base, &self.head) {
791-
(base.sha.clone(), head.sha.clone())
819+
(&base.sha, &head.sha)
792820
} else {
793821
return Ok(None);
794822
};
795823

796-
let mut req = client.get(&format!(
797-
"{}/compare/{}...{}",
798-
self.repository().url(),
799-
before,
800-
after
801-
));
802-
req = req.header("Accept", "application/vnd.github.v3.diff");
803-
let (diff, _) = client.send_req(req).await?;
804-
let body = String::from_utf8_lossy(&diff).to_string();
805-
Ok(Some(body))
824+
let diff = pr
825+
.files_changed
826+
.get_or_try_init::<anyhow::Error, _, _>(|| async move {
827+
let url = format!("{}/compare/{before}...{after}", self.repository().url());
828+
let mut req = client.get(&url);
829+
req = req.header("Accept", "application/vnd.github.v3.diff");
830+
let (diff, _) = client
831+
.send_req(req)
832+
.await
833+
.with_context(|| format!("failed to fetch diff comparison for {url}"))?;
834+
let body = String::from_utf8_lossy(&diff);
835+
Ok(parse_diff(&body))
836+
})
837+
.await?;
838+
Ok(Some(diff))
806839
}
807840

808841
/// Returns the commits from this pull request (no commits are returned if this `Issue` is not
@@ -982,19 +1015,29 @@ pub struct CommitBase {
9821015
pub repo: Repository,
9831016
}
9841017

985-
pub fn files_changed(diff: &str) -> Vec<&str> {
986-
let mut files = Vec::new();
987-
for line in diff.lines() {
988-
// mostly copied from highfive
989-
if line.starts_with("diff --git ") {
990-
files.push(
991-
line[line.find(" b/").unwrap()..]
992-
.strip_prefix(" b/")
993-
.unwrap(),
994-
);
995-
}
996-
}
1018+
pub fn parse_diff(diff: &str) -> Vec<FileDiff> {
1019+
// This does not properly handle filenames with spaces.
1020+
let re = regex::Regex::new("(?m)^diff --git .* b/(.*)").unwrap();
1021+
let mut files: Vec<_> = re
1022+
.captures_iter(diff)
1023+
.map(|cap| {
1024+
let start = cap.get(0).unwrap().start();
1025+
let path = cap.get(1).unwrap().as_str().to_string();
1026+
(start, path)
1027+
})
1028+
.collect();
1029+
// Break the list up into (start, end) pairs starting from the "diff --git" line.
1030+
files.push((diff.len(), String::new()));
9971031
files
1032+
.windows(2)
1033+
.map(|w| {
1034+
let (start, end) = (&w[0], &w[1]);
1035+
FileDiff {
1036+
path: start.1.clone(),
1037+
diff: diff[start.0..end.0].to_string(),
1038+
}
1039+
})
1040+
.collect()
9981041
}
9991042

10001043
#[derive(Debug, serde::Deserialize)]
@@ -1503,7 +1546,7 @@ impl Repository {
15031546
self.full_name
15041547
)
15051548
})?;
1506-
issue.pull_request = Some(PullRequestDetails {});
1549+
issue.pull_request = Some(PullRequestDetails::new());
15071550
Ok(issue)
15081551
}
15091552

@@ -2484,7 +2527,8 @@ index fb9cee43b2d..b484c25ea51 100644
24842527
zulip_stream = 245100 # #t-compiler/wg-prioritization/alerts
24852528
topic = "#{number} {title}"
24862529
"##;
2487-
assert_eq!(files_changed(input), vec!["triagebot.toml".to_string()]);
2530+
let files: Vec<_> = parse_diff(input).into_iter().map(|d| d.path).collect();
2531+
assert_eq!(files, vec!["triagebot.toml".to_string()]);
24882532
}
24892533

24902534
#[test]
@@ -2516,8 +2560,9 @@ index c58310947d2..3b0854d4a9b 100644
25162560
}
25172561
+
25182562
"##;
2563+
let files: Vec<_> = parse_diff(input).into_iter().map(|d| d.path).collect();
25192564
assert_eq!(
2520-
files_changed(input),
2565+
files,
25212566
vec![
25222567
"library/stdarch".to_string(),
25232568
"src/librustdoc/clean/types.rs".to_string(),

src/handlers/assign.rs

Lines changed: 68 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
2020
use crate::{
2121
config::AssignConfig,
22-
github::{self, Event, Issue, IssuesAction, Selection},
22+
github::{self, Event, FileDiff, Issue, IssuesAction, Selection},
2323
handlers::{Context, GithubClient, IssuesEvent},
2424
interactions::EditIssueBody,
2525
};
@@ -80,13 +80,11 @@ struct AssignData {
8080
}
8181

8282
/// Input for auto-assignment when a PR is created.
83-
pub(super) struct AssignInput {
84-
git_diff: String,
85-
}
83+
pub(super) struct AssignInput {}
8684

8785
/// Prepares the input when a new PR is opened.
8886
pub(super) async fn parse_input(
89-
ctx: &Context,
87+
_ctx: &Context,
9088
event: &IssuesEvent,
9189
config: Option<&AssignConfig>,
9290
) -> Result<Option<AssignInput>, String> {
@@ -100,15 +98,7 @@ pub(super) async fn parse_input(
10098
{
10199
return Ok(None);
102100
}
103-
let git_diff = match event.issue.diff(&ctx.github).await {
104-
Ok(None) => return Ok(None),
105-
Err(e) => {
106-
log::error!("failed to fetch diff: {:?}", e);
107-
return Ok(None);
108-
}
109-
Ok(Some(diff)) => diff,
110-
};
111-
Ok(Some(AssignInput { git_diff }))
101+
Ok(Some(AssignInput {}))
112102
}
113103

114104
/// Handles the work of setting an assignment for a new PR and posting a
@@ -117,11 +107,18 @@ pub(super) async fn handle_input(
117107
ctx: &Context,
118108
config: &AssignConfig,
119109
event: &IssuesEvent,
120-
input: AssignInput,
110+
_input: AssignInput,
121111
) -> anyhow::Result<()> {
112+
let Some(diff) = event.issue.diff(&ctx.github).await? else {
113+
bail!(
114+
"expected issue {} to be a PR, but the diff could not be determined",
115+
event.issue.number
116+
)
117+
};
118+
122119
// Don't auto-assign or welcome if the user manually set the assignee when opening.
123120
if event.issue.assignees.is_empty() {
124-
let (assignee, from_comment) = determine_assignee(ctx, event, config, &input).await?;
121+
let (assignee, from_comment) = determine_assignee(ctx, event, config, &diff).await?;
125122
if assignee.as_deref() == Some("ghost") {
126123
// "ghost" is GitHub's placeholder account for deleted accounts.
127124
// It is used here as a convenient way to prevent assignment. This
@@ -180,7 +177,7 @@ pub(super) async fn handle_input(
180177
if config.warn_non_default_branch {
181178
warnings.extend(non_default_branch(event));
182179
}
183-
warnings.extend(modifies_submodule(&input.git_diff));
180+
warnings.extend(modifies_submodule(diff));
184181
if !warnings.is_empty() {
185182
let warnings: Vec<_> = warnings
186183
.iter()
@@ -222,9 +219,9 @@ fn non_default_branch(event: &IssuesEvent) -> Option<String> {
222219
}
223220

224221
/// Returns a message if the PR modifies a git submodule.
225-
fn modifies_submodule(diff: &str) -> Option<String> {
222+
fn modifies_submodule(diff: &[FileDiff]) -> Option<String> {
226223
let re = regex::Regex::new(r"\+Subproject\scommit\s").unwrap();
227-
if re.is_match(diff) {
224+
if diff.iter().any(|fd| re.is_match(&fd.diff)) {
228225
Some(SUBMODULE_WARNING_MSG.to_string())
229226
} else {
230227
None
@@ -278,7 +275,7 @@ async fn determine_assignee(
278275
ctx: &Context,
279276
event: &IssuesEvent,
280277
config: &AssignConfig,
281-
input: &AssignInput,
278+
diff: &[FileDiff],
282279
) -> anyhow::Result<(Option<String>, bool)> {
283280
let teams = crate::team_data::teams(&ctx.github).await?;
284281
if let Some(name) = find_assign_command(ctx, event) {
@@ -298,7 +295,7 @@ async fn determine_assignee(
298295
}
299296
}
300297
// Errors fall-through to try fallback group.
301-
match find_reviewers_from_diff(config, &input.git_diff) {
298+
match find_reviewers_from_diff(config, diff) {
302299
Ok(candidates) if !candidates.is_empty() => {
303300
match find_reviewer_from_names(&teams, config, &event.issue, &candidates) {
304301
Ok(assignee) => return Ok((Some(assignee), false)),
@@ -346,60 +343,61 @@ async fn determine_assignee(
346343
/// May return an error if the owners map is misconfigured.
347344
///
348345
/// Beware this may return an empty list if nothing matches.
349-
fn find_reviewers_from_diff(config: &AssignConfig, diff: &str) -> anyhow::Result<Vec<String>> {
346+
fn find_reviewers_from_diff(
347+
config: &AssignConfig,
348+
diff: &[FileDiff],
349+
) -> anyhow::Result<Vec<String>> {
350350
// Map of `owners` path to the number of changes found in that path.
351351
// This weights the reviewer choice towards places where the most edits are done.
352352
let mut counts: HashMap<&str, u32> = HashMap::new();
353-
// List of the longest `owners` patterns that match the current path. This
354-
// prefers choosing reviewers from deeply nested paths over those defined
355-
// for top-level paths, under the assumption that they are more
356-
// specialized.
357-
//
358-
// This is a list to handle the situation if multiple paths of the same
359-
// length match.
360-
let mut longest_owner_patterns = Vec::new();
361-
// Iterate over the diff, finding the start of each file. After each file
362-
// is found, it counts the number of modified lines in that file, and
363-
// tracks those in the `counts` map.
364-
for line in diff.split('\n') {
365-
if line.starts_with("diff --git ") {
366-
// Start of a new file.
367-
longest_owner_patterns.clear();
368-
let path = line[line.find(" b/").unwrap()..]
369-
.strip_prefix(" b/")
370-
.unwrap();
371-
// Find the longest `owners` entries that match this path.
372-
let mut longest = HashMap::new();
373-
for owner_pattern in config.owners.keys() {
374-
let ignore = ignore::gitignore::GitignoreBuilder::new("/")
375-
.add_line(None, owner_pattern)
376-
.with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
377-
.build()?;
378-
if ignore.matched_path_or_any_parents(path, false).is_ignore() {
379-
let owner_len = owner_pattern.split('/').count();
380-
longest.insert(owner_pattern, owner_len);
381-
}
382-
}
383-
let max_count = longest.values().copied().max().unwrap_or(0);
384-
longest_owner_patterns.extend(
385-
longest
386-
.iter()
387-
.filter(|(_, count)| **count == max_count)
388-
.map(|x| *x.0),
389-
);
390-
// Give some weight to these patterns to start. This helps with
391-
// files modified without any lines changed.
392-
for owner_pattern in &longest_owner_patterns {
393-
*counts.entry(owner_pattern).or_default() += 1;
353+
// Iterate over the diff, counting the number of modified lines in each
354+
// file, and tracks those in the `counts` map.
355+
for file_diff in diff {
356+
// List of the longest `owners` patterns that match the current path. This
357+
// prefers choosing reviewers from deeply nested paths over those defined
358+
// for top-level paths, under the assumption that they are more
359+
// specialized.
360+
//
361+
// This is a list to handle the situation if multiple paths of the same
362+
// length match.
363+
let mut longest_owner_patterns = Vec::new();
364+
365+
// Find the longest `owners` entries that match this path.
366+
let mut longest = HashMap::new();
367+
for owner_pattern in config.owners.keys() {
368+
let ignore = ignore::gitignore::GitignoreBuilder::new("/")
369+
.add_line(None, owner_pattern)
370+
.with_context(|| format!("owner file pattern `{owner_pattern}` is not valid"))?
371+
.build()?;
372+
if ignore
373+
.matched_path_or_any_parents(&file_diff.path, false)
374+
.is_ignore()
375+
{
376+
let owner_len = owner_pattern.split('/').count();
377+
longest.insert(owner_pattern, owner_len);
394378
}
395-
continue;
396379
}
397-
// Check for a modified line.
398-
if (!line.starts_with("+++") && line.starts_with('+'))
399-
|| (!line.starts_with("---") && line.starts_with('-'))
400-
{
401-
for owner_path in &longest_owner_patterns {
402-
*counts.entry(owner_path).or_default() += 1;
380+
let max_count = longest.values().copied().max().unwrap_or(0);
381+
longest_owner_patterns.extend(
382+
longest
383+
.iter()
384+
.filter(|(_, count)| **count == max_count)
385+
.map(|x| *x.0),
386+
);
387+
// Give some weight to these patterns to start. This helps with
388+
// files modified without any lines changed.
389+
for owner_pattern in &longest_owner_patterns {
390+
*counts.entry(owner_pattern).or_default() += 1;
391+
}
392+
393+
// Count the modified lines.
394+
for line in file_diff.diff.lines() {
395+
if (!line.starts_with("+++") && line.starts_with('+'))
396+
|| (!line.starts_with("---") && line.starts_with('-'))
397+
{
398+
for owner_path in &longest_owner_patterns {
399+
*counts.entry(owner_path).or_default() += 1;
400+
}
403401
}
404402
}
405403
}

src/handlers/assign/tests/tests_from_diff.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
33
use super::super::*;
44
use crate::config::AssignConfig;
5+
use crate::github::parse_diff;
56
use std::fmt::Write;
67

78
fn test_from_diff(diff: &str, config: toml::Value, expected: &[&str]) {
9+
let files = parse_diff(diff);
810
let aconfig: AssignConfig = config.try_into().unwrap();
911
assert_eq!(
10-
find_reviewers_from_diff(&aconfig, diff).unwrap(),
12+
find_reviewers_from_diff(&aconfig, &files).unwrap(),
1113
expected.iter().map(|x| x.to_string()).collect::<Vec<_>>()
1214
);
1315
}

0 commit comments

Comments
 (0)