Skip to content

Commit 59ce4c6

Browse files
committed
feat: A Graph for quick access to commits and for associating state with them.
This data structure should be used whenever stateful traversal is required, usually by associating information with each commit to remember what was seen and what wasn't.
1 parent dde8c3a commit 59ce4c6

File tree

6 files changed

+302
-5
lines changed

6 files changed

+302
-5
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gix-revision/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ gix-hash = { version = "^0.11.1", path = "../gix-hash" }
2121
gix-object = { version = "^0.29.2", path = "../gix-object" }
2222
gix-date = { version = "^0.5.0", path = "../gix-date" }
2323
gix-hashtable = { version = "^0.2.0", path = "../gix-hashtable" }
24+
gix-commitgraph = { version = "0.14.0", path = "../gix-commitgraph" }
2425

2526
bstr = { version = "1.3.0", default-features = false, features = ["std"]}
2627
thiserror = "1.0.26"
2728
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
29+
smallvec = "1.10.0"
2830
document-features = { version = "0.2.1", optional = true }
2931

3032
[dev-dependencies]

gix-revision/src/describe.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ pub(crate) mod function {
211211

212212
while let Some(commit) = queue.pop() {
213213
commits_seen += 1;
214-
if let Some(name) = name_by_oid.get(&commit) {
214+
let flags = if let Some(name) = name_by_oid.get(&commit) {
215215
if candidates.len() < max_candidates {
216216
let identity_bit = 1 << candidates.len();
217217
candidates.push(Candidate {
@@ -220,14 +220,17 @@ pub(crate) mod function {
220220
identity_bit,
221221
order: candidates.len(),
222222
});
223-
*seen.get_mut(&commit).expect("inserted") |= identity_bit;
223+
let flags = seen.get_mut(&commit).expect("inserted");
224+
*flags |= identity_bit;
225+
*flags
224226
} else {
225227
gave_up_on_commit = Some(commit);
226228
break;
227229
}
228-
}
230+
} else {
231+
seen[&commit]
232+
};
229233

230-
let flags = seen[&commit];
231234
for candidate in candidates
232235
.iter_mut()
233236
.filter(|c| (flags & c.identity_bit) != c.identity_bit)

gix-revision/src/graph.rs

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
use crate::Graph;
2+
use gix_hash::oid;
3+
use smallvec::SmallVec;
4+
use std::ops::Index;
5+
6+
impl<'find, T> Graph<'find, T> {
7+
/// Create a new instance with `find` to retrieve commits and optionally `cache` to accelerate commit access.
8+
pub fn new<Find, E>(mut find: Find, cache: impl Into<Option<gix_commitgraph::Graph>>) -> Self
9+
where
10+
Find:
11+
for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<Option<gix_object::CommitRefIter<'a>>, E> + 'find,
12+
E: std::error::Error + Send + Sync + 'static,
13+
{
14+
Graph {
15+
find: Box::new(move |id, buf| {
16+
find(id, buf).map_err(|err| Box::new(err) as Box<dyn std::error::Error + Send + Sync + 'static>)
17+
}),
18+
cache: cache.into(),
19+
set: gix_hashtable::HashMap::default(),
20+
buf: Vec::new(),
21+
parent_buf: Vec::new(),
22+
}
23+
}
24+
25+
/// Returns true if `id` has data associated with it, meaning that we processed it already.
26+
pub fn contains(&self, id: &gix_hash::oid) -> bool {
27+
self.set.contains_key(id.as_ref())
28+
}
29+
30+
/// Returns the data associated with `id` if available.
31+
pub fn get(&self, id: &gix_hash::oid) -> Option<&T> {
32+
self.set.get(id)
33+
}
34+
35+
/// Returns the data associated with `id` if available as mutable reference.
36+
pub fn get_mut(&mut self, id: &gix_hash::oid) -> Option<&mut T> {
37+
self.set.get_mut(id)
38+
}
39+
40+
/// Insert `id` into the graph and associate it with `value`, returning the previous value associated with it if it existed.
41+
pub fn insert(&mut self, id: gix_hash::ObjectId, value: T) -> Option<T> {
42+
self.set.insert(id, value)
43+
}
44+
45+
/// Remove all data from the graph to start over.
46+
pub fn clear(&mut self) {
47+
self.set.clear();
48+
}
49+
50+
/// Try to lookup `id` and return a handle to it for accessing its data, but don't fail if the commit doesn't exist.
51+
///
52+
/// It's possible that commits don't exist if the repository is shallow.
53+
pub fn try_lookup(&mut self, id: &gix_hash::oid) -> Result<Option<Commit<'_>>, lookup::Error> {
54+
try_lookup(id, &mut self.find, self.cache.as_ref(), &mut self.buf)
55+
}
56+
57+
/// Lookup `id` and return a handle to it, or fail if it doesn't exist.
58+
pub fn lookup(&mut self, id: &gix_hash::oid) -> Result<Commit<'_>, lookup::existing::Error> {
59+
self.try_lookup(id)?.ok_or(lookup::existing::Error::Missing)
60+
}
61+
62+
/// Insert the parents of commit named `id` to the graph and associate new parents with data
63+
/// by calling `new_parent_data(parent_id, committer_timestamp)`, or update existing parents
64+
/// data with `update_existing(parent_id, &mut existing_data)`.
65+
/// If `first_parent` is `true`, only the first parent of commits will be looked at.
66+
pub fn insert_parents(
67+
&mut self,
68+
id: &gix_hash::oid,
69+
mut new_parent_data: impl FnMut(gix_hash::ObjectId, u64) -> T,
70+
mut update_existing: impl FnMut(gix_hash::ObjectId, &mut T),
71+
first_parent: bool,
72+
) -> Result<(), insert_parents::Error> {
73+
let commit = self.lookup(id)?;
74+
let parents: SmallVec<[_; 2]> = commit
75+
.iter_parents()
76+
.take(if first_parent { 1 } else { usize::MAX })
77+
.collect();
78+
for parent_id in parents {
79+
let parent_id = parent_id?;
80+
match self.set.entry(parent_id) {
81+
gix_hashtable::hash_map::Entry::Vacant(entry) => {
82+
let parent = match try_lookup(&parent_id, &mut self.find, self.cache.as_ref(), &mut self.parent_buf)
83+
.map_err(|err| insert_parents::Error::Lookup(lookup::existing::Error::Find(err)))?
84+
{
85+
Some(p) => p,
86+
None => continue, // skip missing objects, this is due to shallow clones for instance.
87+
};
88+
89+
let parent_commit_date = parent.committer_timestamp().unwrap_or_default();
90+
entry.insert(new_parent_data(parent_id, parent_commit_date));
91+
}
92+
gix_hashtable::hash_map::Entry::Occupied(mut entry) => {
93+
update_existing(parent_id, entry.get_mut());
94+
}
95+
}
96+
if first_parent {
97+
break;
98+
}
99+
}
100+
Ok(())
101+
}
102+
}
103+
104+
fn try_lookup<'graph>(
105+
id: &gix_hash::oid,
106+
find: &mut Box<FindFn<'_>>,
107+
cache: Option<&'graph gix_commitgraph::Graph>,
108+
buf: &'graph mut Vec<u8>,
109+
) -> Result<Option<Commit<'graph>>, lookup::Error> {
110+
if let Some(cache) = cache {
111+
if let Some(pos) = cache.lookup(id) {
112+
return Ok(Some(Commit {
113+
backing: Either::Right((cache, pos)),
114+
}));
115+
}
116+
}
117+
#[allow(clippy::manual_map)]
118+
Ok(match find(id, buf)? {
119+
Some(_) => Some(Commit {
120+
backing: Either::Left(buf),
121+
}),
122+
None => None,
123+
})
124+
}
125+
126+
impl<'a, 'find, T> Index<&'a gix_hash::oid> for Graph<'find, T> {
127+
type Output = T;
128+
129+
fn index(&self, index: &'a oid) -> &Self::Output {
130+
&self.set[index]
131+
}
132+
}
133+
134+
///
135+
pub mod lookup {
136+
/// The error returned by [`try_lookup()`][crate::Graph::try_lookup()].
137+
#[derive(Debug, thiserror::Error)]
138+
#[allow(missing_docs)]
139+
pub enum Error {
140+
#[error("There was an error looking up a commit")]
141+
Find(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
142+
}
143+
144+
///
145+
pub mod existing {
146+
/// The error returned by [`lookup()`][crate::Graph::lookup()].
147+
#[derive(Debug, thiserror::Error)]
148+
#[allow(missing_docs)]
149+
pub enum Error {
150+
#[error(transparent)]
151+
Find(#[from] super::Error),
152+
#[error("Commit could not be found")]
153+
Missing,
154+
}
155+
}
156+
}
157+
158+
///
159+
pub mod insert_parents {
160+
use crate::graph::commit::iter_parents;
161+
use crate::graph::lookup;
162+
163+
/// The error returned by [`insert_parents()`][crate::Graph::insert_parents()].
164+
#[derive(Debug, thiserror::Error)]
165+
#[allow(missing_docs)]
166+
pub enum Error {
167+
#[error(transparent)]
168+
Lookup(#[from] lookup::existing::Error),
169+
#[error("A commit could not be decoded during traversal")]
170+
Decode(#[from] gix_object::decode::Error),
171+
#[error(transparent)]
172+
Parent(#[from] iter_parents::Error),
173+
}
174+
}
175+
176+
enum Either<T, U> {
177+
Left(T),
178+
Right(U),
179+
}
180+
181+
/// A commit that provides access to graph-related information.
182+
pub struct Commit<'graph> {
183+
backing: Either<&'graph [u8], (&'graph gix_commitgraph::Graph, gix_commitgraph::Position)>,
184+
}
185+
186+
///
187+
pub mod commit {
188+
use super::Commit;
189+
use crate::graph::Either;
190+
191+
impl<'graph> Commit<'graph> {
192+
/// Return an iterator over the parents of this commit.
193+
pub fn iter_parents(&self) -> Parents<'graph> {
194+
let backing = match &self.backing {
195+
Either::Left(buf) => Either::Left(gix_object::CommitRefIter::from_bytes(buf)),
196+
Either::Right((cache, pos)) => Either::Right((*cache, cache.commit_at(*pos).iter_parents())),
197+
};
198+
Parents { backing }
199+
}
200+
201+
/// Returns the timestamp at which this commit was created.
202+
///
203+
/// This is the single-most important date for determining recency of commits.
204+
/// Note that this can only fail if the commit is backed by the object database *and* parsing fails.
205+
pub fn committer_timestamp(&self) -> Result<u64, gix_object::decode::Error> {
206+
Ok(match &self.backing {
207+
Either::Left(buf) => {
208+
gix_object::CommitRefIter::from_bytes(buf)
209+
.committer()?
210+
.time
211+
.seconds_since_unix_epoch as u64
212+
}
213+
Either::Right((cache, pos)) => cache.commit_at(*pos).committer_timestamp(),
214+
})
215+
}
216+
}
217+
218+
/// An iterator over the parents of a commit.
219+
pub struct Parents<'graph> {
220+
backing: Either<
221+
gix_object::CommitRefIter<'graph>,
222+
(
223+
&'graph gix_commitgraph::Graph,
224+
gix_commitgraph::file::commit::Parents<'graph>,
225+
),
226+
>,
227+
}
228+
229+
impl<'graph> Iterator for Parents<'graph> {
230+
type Item = Result<gix_hash::ObjectId, iter_parents::Error>;
231+
232+
fn next(&mut self) -> Option<Self::Item> {
233+
match &mut self.backing {
234+
Either::Left(it) => {
235+
for token in it {
236+
match token {
237+
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
238+
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => return Some(Ok(id)),
239+
Ok(_unused_token) => break,
240+
Err(err) => return Some(Err(err.into())),
241+
}
242+
}
243+
None
244+
}
245+
Either::Right((cache, it)) => it
246+
.next()
247+
.map(|r| r.map(|pos| cache.id_at(pos).to_owned()).map_err(Into::into)),
248+
}
249+
}
250+
}
251+
252+
///
253+
pub mod iter_parents {
254+
/// The error returned by the [`Parents`][super::Parents] iterator.
255+
#[derive(Debug, thiserror::Error)]
256+
#[allow(missing_docs)]
257+
pub enum Error {
258+
#[error("An error occurred when parsing commit parents")]
259+
DecodeCommit(#[from] gix_object::decode::Error),
260+
#[error("An error occurred when parsing parents from the commit graph")]
261+
DecodeCommitGraph(#[from] gix_commitgraph::file::commit::Error),
262+
}
263+
}
264+
}
265+
266+
pub(crate) type FindFn<'find> = dyn for<'a> FnMut(
267+
&gix_hash::oid,
268+
&'a mut Vec<u8>,
269+
)
270+
-> Result<Option<gix_object::CommitRefIter<'a>>, Box<dyn std::error::Error + Send + Sync + 'static>>
271+
+ 'find;

gix-revision/src/lib.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,28 @@ pub use describe::function::describe;
1616
pub mod spec;
1717

1818
mod types;
19+
use crate::graph::FindFn;
1920
pub use types::Spec;
2021

22+
/// A graph of commits which additionally allows to associate data with commits.
23+
///
24+
/// It starts empty, but each access may fill it with commit information.
25+
/// Note that the traversal can be accelerated if a [commit-graph][gix_commitgraph::Graph] is also made available.
26+
pub struct Graph<'find, T> {
27+
/// A way to resolve a commit from the object database.
28+
find: Box<FindFn<'find>>,
29+
/// A way to speedup commit access, essentially a multi-file commit database.
30+
cache: Option<gix_commitgraph::Graph>,
31+
/// The set of cached commits that we have seen once, along with data associated with them.
32+
set: gix_hashtable::HashMap<gix_hash::ObjectId, T>,
33+
/// A buffer for writing commit data into.
34+
buf: Vec<u8>,
35+
/// Another buffer we typically use to store parents.
36+
parent_buf: Vec<u8>,
37+
}
38+
///
39+
pub mod graph;
40+
2141
/// A utility type implementing a queue which can be used to automatically sort data by its time in ascending order.
2242
///
2343
/// Note that the performance of this queue is very relevant to overall algorithm performance of many graph-walking algorithms,

gix-revision/tests/revision.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
mod describe;
22
mod spec;
3-
43
pub type Result<T = ()> = std::result::Result<T, Box<dyn std::error::Error + 'static>>;
54

65
fn hex_to_id(hex: &str) -> gix_hash::ObjectId {

0 commit comments

Comments
 (0)