Skip to content

Commit d72ff34

Browse files
committed
feat: use gix-negotiate in fetch machinery.
Thanks to it we are finally able to do pack negotiations just like git can, as many rounds as it takes and with all available algorithms.
1 parent 6a3c021 commit d72ff34

File tree

10 files changed

+423
-73
lines changed

10 files changed

+423
-73
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ See its [README.md](https://github.com/Byron/gitoxide/blob/main/gix-lock/README.
670670
* [x] fetch
671671
* [x] shallow (remains shallow, options to adjust shallow boundary)
672672
* [ ] a way to auto-explode small packs to avoid them to pile up
673-
* [ ] 'ref-in-want'
673+
* [x] 'ref-in-want'
674674
* [ ] standard negotiation algorithms (right now we only have a 'naive' one)
675675
* [ ] push
676676
* [x] ls-refs

gix/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ gix-object = { version = "^0.29.2", path = "../gix-object" }
131131
gix-actor = { version = "^0.20.0", path = "../gix-actor" }
132132
gix-pack = { version = "^0.35.0", path = "../gix-pack", features = ["object-cache-dynamic"] }
133133
gix-revision = { version = "^0.14.0", path = "../gix-revision" }
134+
gix-negotiate = { version = "0.1.0", path = "../gix-negotiate" }
134135

135136
gix-path = { version = "^0.8.0", path = "../gix-path" }
136137
gix-url = { version = "^0.18.0", path = "../gix-url" }
Lines changed: 281 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,261 @@
11
use crate::remote::fetch;
2-
use crate::remote::fetch::negotiate::Algorithm;
2+
use crate::remote::fetch::Shallow;
3+
use gix_negotiate::Flags;
4+
use gix_pack::Find;
5+
use std::borrow::Cow;
6+
type Queue = gix_revision::PriorityQueue<gix_revision::graph::CommitterTimestamp, gix_hash::ObjectId>;
37

48
/// The error returned during negotiation.
59
#[derive(Debug, thiserror::Error)]
610
#[allow(missing_docs)]
711
pub enum Error {
812
#[error("We were unable to figure out what objects the server should send after {rounds} round(s)")]
913
NegotiationFailed { rounds: usize },
14+
#[error(transparent)]
15+
LookupCommitInGraph(#[from] gix_revision::graph::lookup::commit::Error),
16+
#[error(transparent)]
17+
InitRefsIterator(#[from] crate::reference::iter::init::Error),
18+
#[error(transparent)]
19+
InitRefsIteratorPlatform(#[from] crate::reference::iter::Error),
20+
#[error(transparent)]
21+
ObtainRefDuringIteration(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
22+
#[error(transparent)]
23+
LoadIndex(#[from] gix_odb::store::load_index::Error),
24+
}
25+
26+
#[must_use]
27+
pub(crate) enum Action {
28+
/// None of the remote refs moved compared to our last recorded state (via tracking refs), so there is nothing to do at all,
29+
/// not even a ref update.
30+
NoChange,
31+
/// Don't negotiate, don't fetch the pack, skip right to updating the references.
32+
///
33+
/// This happens if we already have all local objects even though the server seems to have changed.
34+
SkipToRefUpdate,
35+
/// We can't know for sure if fetching *is not* needed, so we go ahead and negotiate.
36+
MustNegotiate {
37+
/// Each `ref_map.mapping` has a slot here which is `true` if we have the object the remote ref points to locally.
38+
remote_ref_target_known: Vec<bool>,
39+
},
40+
}
41+
42+
/// This function is modeled after the similarly named one in the git codebase to do the following:
43+
///
44+
/// * figure out all advertised refs on the remote *that we already have* and keep track of the oldest one as cutoff date.
45+
/// * mark all of our own refs as tips for a traversal.
46+
/// * mark all their parents, recursively, up to (and including) the cutoff date up to which we have seen the servers commit that we have.
47+
/// * pass all known-to-be-common-with-remote commits to the negotiator as common commits.
48+
///
49+
/// This is done so that we already find the most recent common commits, even if we are ahead, which is *potentially* better than
50+
/// what we would get if we would rely on tracking refs alone, particularly if one wouldn't trust the tracking refs for some reason.
51+
///
52+
/// Note that git doesn't trust its own tracking refs as the server *might* have changed completely, for instance by force-pushing, so
53+
/// marking our local tracking refs as known is something that's actually not proven to be correct so it's not done.
54+
///
55+
/// Additionally, it does what's done in `transport.c` and we check if a fetch is actually needed as at least one advertised ref changed.
56+
///
57+
/// Finally, we also mark tips in the `negotiator` in one go to avoid traversing all refs twice, since we naturally encounter all tips during
58+
/// our own walk.
59+
///
60+
/// Return whether or not we should negotiate, along with a queue for later use.
61+
pub(crate) fn mark_complete_and_common_ref(
62+
repo: &crate::Repository,
63+
negotiator: &mut dyn gix_negotiate::Negotiator,
64+
graph: &mut gix_negotiate::Graph<'_>,
65+
ref_map: &fetch::RefMap,
66+
shallow: &fetch::Shallow,
67+
) -> Result<Action, Error> {
68+
if let fetch::Shallow::Deepen(0) = shallow {
69+
// Avoid deepening (relative) with zero as it seems to upset the server. Git also doesn't actually
70+
// perform the negotiation for some reason (couldn't find it in code).
71+
return Ok(Action::NoChange);
72+
}
73+
74+
// Compute the cut-off date by checking which of the refs advertised (and matched in refspecs) by the remote we have,
75+
// and keep the oldest one.
76+
let mut cutoff_date = None::<gix_revision::graph::CommitterTimestamp>;
77+
let mut num_mappings_with_change = 0;
78+
let mut remote_ref_target_known: Vec<bool> = std::iter::repeat(false).take(ref_map.mappings.len()).collect();
79+
80+
for (mapping_idx, mapping) in ref_map.mappings.iter().enumerate() {
81+
let want_id = mapping.remote.as_id();
82+
let have_id = mapping.local.as_ref().and_then(|name| {
83+
// this is the only time git uses the peer-id.
84+
let r = repo.find_reference(name).ok()?;
85+
r.target().try_id().map(ToOwned::to_owned)
86+
});
87+
88+
// Like git, we don't let known unchanged mappings participate in the tree traversal
89+
if want_id.zip(have_id).map_or(true, |(want, have)| want != have) {
90+
num_mappings_with_change += 1;
91+
}
92+
93+
if let Some(commit) = want_id
94+
.and_then(|id| graph.try_lookup_or_insert_commit(id.into(), |_| {}).transpose())
95+
.transpose()?
96+
{
97+
remote_ref_target_known[mapping_idx] = true;
98+
cutoff_date = cutoff_date.unwrap_or_default().max(commit.commit_time).into();
99+
} else if want_id.map_or(false, |maybe_annotated_tag| repo.objects.contains(maybe_annotated_tag)) {
100+
remote_ref_target_known[mapping_idx] = true;
101+
}
102+
}
103+
104+
// If any kind of shallowing operation is desired, the server may still create a pack for us.
105+
if matches!(shallow, Shallow::NoChange) {
106+
if num_mappings_with_change == ref_map.mappings.len() {
107+
return Ok(Action::NoChange);
108+
} else if remote_ref_target_known.iter().all(|known| *known) {
109+
return Ok(Action::SkipToRefUpdate);
110+
}
111+
}
112+
113+
// color our commits as complete as identified by references, unconditionally
114+
// (`git` is conditional here based on `deepen`, but it doesn't make sense and it's hard to extract from history when that happened).
115+
let mut queue = Queue::new();
116+
mark_all_refs_in_repo(repo, graph, &mut queue, Flags::COMPLETE)?;
117+
mark_alternate_complete(repo, graph, &mut queue)?;
118+
// Keep track of the tips, which happen to be on our queue right, before we traverse the graph with cutoff.
119+
let tips = if let Some(cutoff) = cutoff_date {
120+
let tips = Cow::Owned(queue.clone());
121+
// color all their parents up to the cutoff date, the oldest commit we know the server has.
122+
mark_recent_complete_commits(&mut queue, graph, cutoff)?;
123+
tips
124+
} else {
125+
Cow::Borrowed(&queue)
126+
};
127+
128+
// mark all complete advertised refs as common refs.
129+
for mapping in ref_map
130+
.mappings
131+
.iter()
132+
.zip(remote_ref_target_known.iter().copied())
133+
// We need this filter as the graph wouldn't contain annotated tags.
134+
.filter_map(|(mapping, known)| (!known).then_some(mapping))
135+
{
136+
let want_id = mapping.remote.as_id();
137+
if let Some(common_id) = want_id
138+
.and_then(|id| graph.get(id).map(|c| (c, id)))
139+
.filter(|(c, _)| c.data.flags.contains(Flags::COMPLETE))
140+
.map(|(_, id)| id)
141+
{
142+
negotiator.known_common(common_id.into(), graph)?;
143+
}
144+
}
145+
146+
// As negotiators currently may rely on getting `known_common` calls first and tips after, we adhere to that which is the only
147+
// reason we cached the set of tips.
148+
for tip in tips.iter_unordered() {
149+
negotiator.add_tip(*tip, graph)?;
150+
}
151+
152+
Ok(Action::MustNegotiate {
153+
remote_ref_target_known,
154+
})
155+
}
156+
157+
/// Add all `wants` to `arguments`, which is the unpeeled direct target that the advertised remote ref points to.
158+
pub(crate) fn add_wants(
159+
arguments: &mut gix_protocol::fetch::Arguments,
160+
mappings: &[fetch::Mapping],
161+
mapping_known: &[bool],
162+
) {
163+
let wants = mappings
164+
.iter()
165+
.zip(mapping_known)
166+
.filter_map(|(m, known)| (!*known).then_some(m));
167+
for want in wants {
168+
if !arguments.can_use_ref_in_want() || matches!(want.remote, fetch::Source::ObjectId(_)) {
169+
if let Some(id) = want.remote.as_id() {
170+
arguments.want(id);
171+
}
172+
} else {
173+
arguments.want_ref(
174+
want.remote
175+
.as_name()
176+
.expect("name available if this isn't an object id"),
177+
)
178+
}
179+
}
180+
}
181+
182+
/// Remove all commits that are more recent than the cut-off, which is the commit time of the oldest common commit we have with the server.
183+
fn mark_recent_complete_commits(
184+
queue: &mut Queue,
185+
graph: &mut gix_negotiate::Graph<'_>,
186+
cutoff: gix_revision::graph::CommitterTimestamp,
187+
) -> Result<(), Error> {
188+
while let Some(id) = queue
189+
.peek()
190+
.and_then(|(commit_time, id)| (commit_time >= &cutoff).then_some(*id))
191+
{
192+
queue.pop();
193+
let commit = graph.get(&id).expect("definitely set when adding tips or parents");
194+
for parent_id in commit.parents.clone() {
195+
let mut was_complete = false;
196+
if let Some(parent) = graph
197+
.try_lookup_or_insert_commit(parent_id, |md| {
198+
was_complete = md.flags.contains(Flags::COMPLETE);
199+
md.flags |= Flags::COMPLETE
200+
})?
201+
.filter(|_| !was_complete)
202+
{
203+
queue.insert(parent.commit_time, parent_id)
204+
}
205+
}
206+
}
207+
Ok(())
208+
}
209+
210+
fn mark_all_refs_in_repo(
211+
repo: &crate::Repository,
212+
graph: &mut gix_negotiate::Graph<'_>,
213+
queue: &mut Queue,
214+
mark: Flags,
215+
) -> Result<(), Error> {
216+
for local_ref in repo.references()?.all()?.peeled() {
217+
let local_ref = local_ref?;
218+
let id = local_ref.id().detach();
219+
let mut is_complete = false;
220+
if let Some(commit) = graph
221+
.try_lookup_or_insert_commit(id, |md| {
222+
is_complete = md.flags.contains(Flags::COMPLETE);
223+
md.flags |= mark
224+
})?
225+
.filter(|_| !is_complete)
226+
{
227+
queue.insert(commit.commit_time, id);
228+
};
229+
}
230+
Ok(())
231+
}
232+
233+
fn mark_alternate_complete(
234+
repo: &crate::Repository,
235+
graph: &mut gix_negotiate::Graph<'_>,
236+
queue: &mut Queue,
237+
) -> Result<(), Error> {
238+
for alternate_repo in repo
239+
.objects
240+
.store_ref()
241+
.alternate_db_paths()?
242+
.into_iter()
243+
.filter_map(|path| {
244+
path.ancestors()
245+
.nth(1)
246+
.and_then(|git_dir| crate::open_opts(git_dir, repo.options.clone()).ok())
247+
})
248+
{
249+
mark_all_refs_in_repo(&alternate_repo, graph, queue, Flags::ALTERNATE | Flags::COMPLETE)?;
250+
}
251+
Ok(())
10252
}
11253

12254
/// Negotiate one round with `algo` by looking at `ref_map` and adjust `arguments` to contain the haves and wants.
13255
/// If this is not the first round, the `previous_response` is set with the last recorded server response.
14256
/// Returns `true` if the negotiation is done from our side so the server won't keep asking.
15257
#[allow(clippy::too_many_arguments)]
16258
pub(crate) fn one_round(
17-
algo: Algorithm,
18259
round: usize,
19260
repo: &crate::Repository,
20261
ref_map: &fetch::RefMap,
@@ -23,64 +264,54 @@ pub(crate) fn one_round(
23264
_previous_response: Option<&gix_protocol::fetch::Response>,
24265
shallow: Option<&fetch::Shallow>,
25266
) -> Result<bool, Error> {
26-
let tag_refspec_to_ignore = fetch_tags
27-
.to_refspec()
28-
.filter(|_| matches!(fetch_tags, crate::remote::fetch::Tags::Included));
29267
if let Some(fetch::Shallow::Deepen(0)) = shallow {
30268
// Avoid deepening (relative) with zero as it seems to upset the server. Git also doesn't actually
31269
// perform the negotiation for some reason (couldn't find it in code).
32270
return Ok(true);
33271
}
34272

35-
match algo {
36-
Algorithm::Noop | Algorithm::Skipping | Algorithm::Consecutive => {
37-
todo!()
273+
let tag_refspec_to_ignore = fetch_tags
274+
.to_refspec()
275+
.filter(|_| matches!(fetch_tags, crate::remote::fetch::Tags::Included));
276+
277+
// Use actual negotiation code here, this is the NAIVE implementation/hack
278+
assert_eq!(round, 1, "Naive always finishes after the first round, it claims.");
279+
let mut has_missing_tracking_branch = false;
280+
for mapping in &ref_map.mappings {
281+
if tag_refspec_to_ignore.map_or(false, |tag_spec| {
282+
mapping
283+
.spec_index
284+
.implicit_index()
285+
.and_then(|idx| ref_map.extra_refspecs.get(idx))
286+
.map_or(false, |spec| spec.to_ref() == tag_spec)
287+
}) {
288+
continue;
38289
}
39-
Algorithm::Naive => {
40-
assert_eq!(round, 1, "Naive always finishes after the first round, it claims.");
41-
let mut has_missing_tracking_branch = false;
42-
for mapping in &ref_map.mappings {
43-
if tag_refspec_to_ignore.map_or(false, |tag_spec| {
44-
mapping
45-
.spec_index
46-
.implicit_index()
47-
.and_then(|idx| ref_map.extra_refspecs.get(idx))
48-
.map_or(false, |spec| spec.to_ref() == tag_spec)
49-
}) {
50-
continue;
51-
}
52-
let have_id = mapping.local.as_ref().and_then(|name| {
53-
repo.find_reference(name)
54-
.ok()
55-
.and_then(|r| r.target().try_id().map(ToOwned::to_owned))
56-
});
57-
match have_id {
58-
Some(have_id) => {
59-
if let Some(want_id) = mapping.remote.as_id() {
60-
if want_id != have_id {
61-
arguments.want(want_id);
62-
arguments.have(have_id);
63-
}
64-
}
65-
}
66-
None => {
67-
if let Some(want_id) = mapping.remote.as_id() {
68-
arguments.want(want_id);
69-
has_missing_tracking_branch = true;
70-
}
71-
}
72-
}
290+
let have_id = mapping.local.as_ref().and_then(|name| {
291+
repo.find_reference(name)
292+
.ok()
293+
.and_then(|r| r.target().try_id().map(ToOwned::to_owned))
294+
});
295+
match (have_id, mapping.remote.as_id()) {
296+
(Some(have_id), Some(want_id)) if want_id != have_id => {
297+
arguments.want(want_id);
298+
arguments.have(have_id);
73299
}
300+
(None, Some(want_id)) => {
301+
arguments.want(want_id);
302+
has_missing_tracking_branch = true;
303+
}
304+
_ => {}
305+
}
306+
}
74307

75-
if has_missing_tracking_branch || (shallow.is_some() && arguments.is_empty()) {
76-
if let Ok(Some(r)) = repo.head_ref() {
77-
if let Some(id) = r.target().try_id() {
78-
arguments.have(id);
79-
arguments.want(id);
80-
}
81-
}
308+
if has_missing_tracking_branch || (shallow.is_some() && arguments.is_empty()) {
309+
if let Ok(Some(r)) = repo.head_ref() {
310+
if let Some(id) = r.target().try_id() {
311+
arguments.have(id);
312+
arguments.want(id);
82313
}
83-
Ok(true)
84314
}
85315
}
316+
Ok(true)
86317
}

0 commit comments

Comments
 (0)