Skip to content

Commit 143682e

Browse files
committed
Merge branch 'ps/pack-refs-auto-heuristics'
"git pack-refs --auto" for the files backend was too aggressive, which has been a bit tamed. * ps/pack-refs-auto-heuristics: refs/files: use heuristic to decide whether to repack with `--auto` t0601: merge tests for auto-packing of refs wrapper: introduce `log2u()`
2 parents 3bf057a + c3459ae commit 143682e

6 files changed

+194
-27
lines changed

bisect.c

+1-11
Original file line numberDiff line numberDiff line change
@@ -1130,16 +1130,6 @@ enum bisect_error bisect_next_all(struct repository *r, const char *prefix)
11301130
return res;
11311131
}
11321132

1133-
static inline int log2i(int n)
1134-
{
1135-
int log2 = 0;
1136-
1137-
for (; n > 1; n >>= 1)
1138-
log2++;
1139-
1140-
return log2;
1141-
}
1142-
11431133
static inline int exp2i(int n)
11441134
{
11451135
return 1 << n;
@@ -1162,7 +1152,7 @@ int estimate_bisect_steps(int all)
11621152
if (all < 3)
11631153
return 0;
11641154

1165-
n = log2i(all);
1155+
n = log2u(all);
11661156
e = exp2i(n);
11671157
x = all - e;
11681158

refs/files-backend.c

+65
Original file line numberDiff line numberDiff line change
@@ -1311,6 +1311,68 @@ static int should_pack_ref(struct files_ref_store *refs,
13111311
return 0;
13121312
}
13131313

1314+
static int should_pack_refs(struct files_ref_store *refs,
1315+
struct pack_refs_opts *opts)
1316+
{
1317+
struct ref_iterator *iter;
1318+
size_t packed_size;
1319+
size_t refcount = 0;
1320+
size_t limit;
1321+
int ret;
1322+
1323+
if (!(opts->flags & PACK_REFS_AUTO))
1324+
return 1;
1325+
1326+
ret = packed_refs_size(refs->packed_ref_store, &packed_size);
1327+
if (ret < 0)
1328+
die("cannot determine packed-refs size");
1329+
1330+
/*
1331+
* Packing loose references into the packed-refs file scales with the
1332+
* number of references we're about to write. We thus decide whether we
1333+
* repack refs by weighing the current size of the packed-refs file
1334+
* against the number of loose references. This is done such that we do
1335+
* not repack too often on repositories with a huge number of
1336+
* references, where we can expect a lot of churn in the number of
1337+
* references.
1338+
*
1339+
* As a heuristic, we repack if the number of loose references in the
1340+
* repository exceeds `log2(nr_packed_refs) * 5`, where we estimate
1341+
* `nr_packed_refs = packed_size / 100`, which scales as following:
1342+
*
1343+
* - 1kB ~ 10 packed refs: 16 refs
1344+
* - 10kB ~ 100 packed refs: 33 refs
1345+
* - 100kB ~ 1k packed refs: 49 refs
1346+
* - 1MB ~ 10k packed refs: 66 refs
1347+
* - 10MB ~ 100k packed refs: 82 refs
1348+
* - 100MB ~ 1m packed refs: 99 refs
1349+
*
1350+
* We thus allow roughly 16 additional loose refs per factor of ten of
1351+
* packed refs. This heuristic may be tweaked in the future, but should
1352+
* serve as a sufficiently good first iteration.
1353+
*/
1354+
limit = log2u(packed_size / 100) * 5;
1355+
if (limit < 16)
1356+
limit = 16;
1357+
1358+
iter = cache_ref_iterator_begin(get_loose_ref_cache(refs, 0), NULL,
1359+
refs->base.repo, 0);
1360+
while ((ret = ref_iterator_advance(iter)) == ITER_OK) {
1361+
if (should_pack_ref(refs, iter->refname, iter->oid,
1362+
iter->flags, opts))
1363+
refcount++;
1364+
if (refcount >= limit) {
1365+
ref_iterator_abort(iter);
1366+
return 1;
1367+
}
1368+
}
1369+
1370+
if (ret != ITER_DONE)
1371+
die("error while iterating over references");
1372+
1373+
return 0;
1374+
}
1375+
13141376
static int files_pack_refs(struct ref_store *ref_store,
13151377
struct pack_refs_opts *opts)
13161378
{
@@ -1323,6 +1385,9 @@ static int files_pack_refs(struct ref_store *ref_store,
13231385
struct strbuf err = STRBUF_INIT;
13241386
struct ref_transaction *transaction;
13251387

1388+
if (!should_pack_refs(refs, opts))
1389+
return 0;
1390+
13261391
transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
13271392
if (!transaction)
13281393
return -1;

refs/packed-backend.c

+18
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,24 @@ int packed_refs_is_locked(struct ref_store *ref_store)
12501250
return is_lock_file_locked(&refs->lock);
12511251
}
12521252

1253+
int packed_refs_size(struct ref_store *ref_store,
1254+
size_t *out)
1255+
{
1256+
struct packed_ref_store *refs = packed_downcast(ref_store, REF_STORE_READ,
1257+
"packed_refs_size");
1258+
struct stat st;
1259+
1260+
if (stat(refs->path, &st) < 0) {
1261+
if (errno != ENOENT)
1262+
return -1;
1263+
*out = 0;
1264+
return 0;
1265+
}
1266+
1267+
*out = st.st_size;
1268+
return 0;
1269+
}
1270+
12531271
/*
12541272
* The packed-refs header line that we write out. Perhaps other traits
12551273
* will be added later.

refs/packed-backend.h

+7
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ int packed_refs_lock(struct ref_store *ref_store, int flags, struct strbuf *err)
2727
void packed_refs_unlock(struct ref_store *ref_store);
2828
int packed_refs_is_locked(struct ref_store *ref_store);
2929

30+
/*
31+
* Obtain the size of the `packed-refs` file. Reports `0` as size in case there
32+
* is no packed-refs file. Returns 0 on success, negative otherwise.
33+
*/
34+
int packed_refs_size(struct ref_store *ref_store,
35+
size_t *out);
36+
3037
/*
3138
* Return true if `transaction` really needs to be carried out against
3239
* the specified packed_ref_store, or false if it can be skipped

t/t0601-reffiles-pack-refs.sh

+85-16
Original file line numberDiff line numberDiff line change
@@ -161,13 +161,6 @@ test_expect_success 'test --exclude takes precedence over --include' '
161161
git pack-refs --include "refs/heads/pack*" --exclude "refs/heads/pack*" &&
162162
test -f .git/refs/heads/dont_pack5'
163163

164-
test_expect_success '--auto packs and prunes refs as usual' '
165-
git branch auto &&
166-
test_path_is_file .git/refs/heads/auto &&
167-
git pack-refs --auto --all &&
168-
test_path_is_missing .git/refs/heads/auto
169-
'
170-
171164
test_expect_success 'see if up-to-date packed refs are preserved' '
172165
git branch q &&
173166
git pack-refs --all --prune &&
@@ -367,14 +360,90 @@ test_expect_success 'pack-refs does not drop broken refs during deletion' '
367360
test_cmp expect actual
368361
'
369362

370-
test_expect_success 'maintenance --auto unconditionally packs loose refs' '
371-
git update-ref refs/heads/something HEAD &&
372-
test_path_is_file .git/refs/heads/something &&
373-
git rev-parse refs/heads/something >expect &&
374-
git maintenance run --task=pack-refs --auto &&
375-
test_path_is_missing .git/refs/heads/something &&
376-
git rev-parse refs/heads/something >actual &&
377-
test_cmp expect actual
378-
'
363+
for command in "git pack-refs --all --auto" "git maintenance run --task=pack-refs --auto"
364+
do
365+
test_expect_success "$command does not repack below 16 refs without packed-refs" '
366+
test_when_finished "rm -rf repo" &&
367+
git init repo &&
368+
(
369+
cd repo &&
370+
git config set maintenance.auto false &&
371+
git commit --allow-empty --message "initial" &&
372+
373+
# Create 14 additional references, which brings us to
374+
# 15 together with the default branch.
375+
printf "create refs/heads/loose-%d HEAD\n" $(test_seq 14) >stdin &&
376+
git update-ref --stdin <stdin &&
377+
test_path_is_missing .git/packed-refs &&
378+
git pack-refs --auto --all &&
379+
test_path_is_missing .git/packed-refs &&
380+
381+
# Create the 16th reference, which should cause us to repack.
382+
git update-ref refs/heads/loose-15 HEAD &&
383+
git pack-refs --auto --all &&
384+
test_path_is_file .git/packed-refs
385+
)
386+
'
387+
388+
test_expect_success "$command does not repack below 16 refs with small packed-refs" '
389+
test_when_finished "rm -rf repo" &&
390+
git init repo &&
391+
(
392+
cd repo &&
393+
git config set maintenance.auto false &&
394+
git commit --allow-empty --message "initial" &&
395+
396+
git pack-refs --all &&
397+
test_line_count = 2 .git/packed-refs &&
398+
399+
# Create 15 loose references.
400+
printf "create refs/heads/loose-%d HEAD\n" $(test_seq 15) >stdin &&
401+
git update-ref --stdin <stdin &&
402+
git pack-refs --auto --all &&
403+
test_line_count = 2 .git/packed-refs &&
404+
405+
# Create the 16th loose reference, which should cause us to repack.
406+
git update-ref refs/heads/loose-17 HEAD &&
407+
git pack-refs --auto --all &&
408+
test_line_count = 18 .git/packed-refs
409+
)
410+
'
411+
412+
test_expect_success "$command scales with size of packed-refs" '
413+
test_when_finished "rm -rf repo" &&
414+
git init repo &&
415+
(
416+
cd repo &&
417+
git config set maintenance.auto false &&
418+
git commit --allow-empty --message "initial" &&
419+
420+
# Create 99 packed refs. This should cause the heuristic
421+
# to require more than the minimum amount of loose refs.
422+
test_seq 99 |
423+
while read i
424+
do
425+
printf "create refs/heads/packed-%d HEAD\n" $i || return 1
426+
done >stdin &&
427+
git update-ref --stdin <stdin &&
428+
git pack-refs --all &&
429+
test_line_count = 101 .git/packed-refs &&
430+
431+
# Create 24 loose refs, which should not yet cause us to repack.
432+
printf "create refs/heads/loose-%d HEAD\n" $(test_seq 24) >stdin &&
433+
git update-ref --stdin <stdin &&
434+
git pack-refs --auto --all &&
435+
test_line_count = 101 .git/packed-refs &&
436+
437+
# Create another handful of refs to cross the border.
438+
# Note that we explicitly do not check for strict
439+
# boundaries here, as this also depends on the size of
440+
# the object hash.
441+
printf "create refs/heads/addn-%d HEAD\n" $(test_seq 10) >stdin &&
442+
git update-ref --stdin <stdin &&
443+
git pack-refs --auto --all &&
444+
test_line_count = 135 .git/packed-refs
445+
)
446+
'
447+
done
379448

380449
test_done

wrapper.h

+18
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,22 @@ int csprng_bytes(void *buf, size_t len);
140140
*/
141141
uint32_t git_rand(void);
142142

143+
/* Provide log2 of the given `size_t`. */
144+
static inline unsigned log2u(uintmax_t sz)
145+
{
146+
unsigned l = 0;
147+
148+
/*
149+
* Technically this isn't required, but it helps the compiler optimize
150+
* this to a `bsr` instruction.
151+
*/
152+
if (!sz)
153+
return 0;
154+
155+
for (; sz; sz >>= 1)
156+
l++;
157+
158+
return l - 1;
159+
}
160+
143161
#endif /* WRAPPER_H */

0 commit comments

Comments
 (0)