Skip to content

Commit 8e3f52d

Browse files
peffgitster
authored andcommitted
find_unique_abbrev: move logic out of get_short_sha1()
The get_short_sha1() is only about reading short sha1s; we do call it in a loop to check "is this long enough" for each object, but otherwise it should not need to know about things like our default_abbrev setting. So instead of asking it to set default_automatic_abbrev as a side-effect, let's just have find_unique_abbrev() pick the right place to start its loop. This requires a separate approximate_object_count() function, but that naturally belongs with the rest of sha1_file.c. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent e6c587c commit 8e3f52d

File tree

3 files changed

+68
-26
lines changed

3 files changed

+68
-26
lines changed

cache.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1204,7 +1204,6 @@ struct object_context {
12041204
#define GET_SHA1_TREEISH 020
12051205
#define GET_SHA1_BLOB 040
12061206
#define GET_SHA1_FOLLOW_SYMLINKS 0100
1207-
#define GET_SHA1_AUTOMATIC 0200
12081207
#define GET_SHA1_ONLY_TO_DIE 04000
12091208

12101209
#define GET_SHA1_DISAMBIGUATORS \
@@ -1456,6 +1455,12 @@ extern void prepare_packed_git(void);
14561455
extern void reprepare_packed_git(void);
14571456
extern void install_packed_git(struct packed_git *pack);
14581457

1458+
/*
1459+
* Give a rough count of objects in the repository. This sacrifices accuracy
1460+
* for speed.
1461+
*/
1462+
unsigned long approximate_object_count(void);
1463+
14591464
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
14601465
struct packed_git *packs);
14611466

sha1_file.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,32 @@ static void prepare_packed_git_one(char *objdir, int local)
13811381
strbuf_release(&path);
13821382
}
13831383

1384+
static int approximate_object_count_valid;
1385+
1386+
/*
1387+
* Give a fast, rough count of the number of objects in the repository. This
1388+
* ignores loose objects completely. If you have a lot of them, then either
1389+
* you should repack because your performance will be awful, or they are
1390+
* all unreachable objects about to be pruned, in which case they're not really
1391+
* interesting as a measure of repo size in the first place.
1392+
*/
1393+
unsigned long approximate_object_count(void)
1394+
{
1395+
static unsigned long count;
1396+
if (!approximate_object_count_valid) {
1397+
struct packed_git *p;
1398+
1399+
prepare_packed_git();
1400+
count = 0;
1401+
for (p = packed_git; p; p = p->next) {
1402+
if (open_pack_index(p))
1403+
continue;
1404+
count += p->num_objects;
1405+
}
1406+
}
1407+
return count;
1408+
}
1409+
13841410
static void *get_next_packed_git(const void *p)
13851411
{
13861412
return ((const struct packed_git *)p)->next;
@@ -1455,6 +1481,7 @@ void prepare_packed_git(void)
14551481

14561482
void reprepare_packed_git(void)
14571483
{
1484+
approximate_object_count_valid = 0;
14581485
prepare_packed_git_run_once = 0;
14591486
prepare_packed_git();
14601487
}

sha1_name.c

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ typedef int (*disambiguate_hint_fn)(const unsigned char *, void *);
1515

1616
struct disambiguate_state {
1717
int len; /* length of prefix in hex chars */
18-
unsigned int nrobjects;
1918
char hex_pfx[GIT_SHA1_HEXSZ + 1];
2019
unsigned char bin_pfx[GIT_SHA1_RAWSZ];
2120

@@ -119,14 +118,6 @@ static void find_short_object_filename(struct disambiguate_state *ds)
119118

120119
if (strlen(de->d_name) != 38)
121120
continue;
122-
123-
/*
124-
* We only look at the one subdirectory, and we assume
125-
* each subdirectory is roughly similar, so each
126-
* object we find probably has 255 other objects in
127-
* the other fan-out directories.
128-
*/
129-
ds->nrobjects += 256;
130121
if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
131122
continue;
132123
memcpy(hex + 2, de->d_name, 38);
@@ -160,7 +151,6 @@ static void unique_in_pack(struct packed_git *p,
160151

161152
open_pack_index(p);
162153
num = p->num_objects;
163-
ds->nrobjects += num;
164154
last = num;
165155
while (first < last) {
166156
uint32_t mid = (first + last) / 2;
@@ -390,9 +380,6 @@ static int show_ambiguous_object(const unsigned char *sha1, void *data)
390380
return 0;
391381
}
392382

393-
/* start from our historical default before the automatic abbreviation */
394-
static int default_automatic_abbrev = FALLBACK_DEFAULT_ABBREV;
395-
396383
static int get_short_sha1(const char *name, int len, unsigned char *sha1,
397384
unsigned flags)
398385
{
@@ -439,14 +426,6 @@ static int get_short_sha1(const char *name, int len, unsigned char *sha1,
439426
for_each_abbrev(ds.hex_pfx, show_ambiguous_object, &ds);
440427
}
441428

442-
if (len < 16 && !status && (flags & GET_SHA1_AUTOMATIC)) {
443-
unsigned int expect_collision = 1 << (len * 2);
444-
if (ds.nrobjects > expect_collision) {
445-
default_automatic_abbrev = len+1;
446-
return SHORT_NAME_AMBIGUOUS;
447-
}
448-
}
449-
450429
return status;
451430
}
452431

@@ -476,22 +455,53 @@ int for_each_abbrev(const char *prefix, each_abbrev_fn fn, void *cb_data)
476455
return ret;
477456
}
478457

458+
/*
459+
* Return the slot of the most-significant bit set in "val". There are various
460+
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
461+
* probably not a big deal here.
462+
*/
463+
static unsigned msb(unsigned long val)
464+
{
465+
unsigned r = 0;
466+
while (val >>= 1)
467+
r++;
468+
return r;
469+
}
470+
479471
int find_unique_abbrev_r(char *hex, const unsigned char *sha1, int len)
480472
{
481473
int status, exists;
482-
int flags = GET_SHA1_QUIETLY;
483474

484475
if (len < 0) {
485-
flags |= GET_SHA1_AUTOMATIC;
486-
len = default_automatic_abbrev;
476+
unsigned long count = approximate_object_count();
477+
/*
478+
* Add one because the MSB only tells us the highest bit set,
479+
* not including the value of all the _other_ bits (so "15"
480+
* is only one off of 2^4, but the MSB is the 3rd bit.
481+
*/
482+
len = msb(count) + 1;
483+
/*
484+
* We now know we have on the order of 2^len objects, which
485+
* expects a collision at 2^(len/2). But we also care about hex
486+
* chars, not bits, and there are 4 bits per hex. So all
487+
* together we need to divide by 2; but we also want to round
488+
* odd numbers up, hence adding one before dividing.
489+
*/
490+
len = (len + 1) / 2;
491+
/*
492+
* For very small repos, we stick with our regular fallback.
493+
*/
494+
if (len < FALLBACK_DEFAULT_ABBREV)
495+
len = FALLBACK_DEFAULT_ABBREV;
487496
}
497+
488498
sha1_to_hex_r(hex, sha1);
489499
if (len == 40 || !len)
490500
return 40;
491501
exists = has_sha1_file(sha1);
492502
while (len < 40) {
493503
unsigned char sha1_ret[20];
494-
status = get_short_sha1(hex, len, sha1_ret, flags);
504+
status = get_short_sha1(hex, len, sha1_ret, GET_SHA1_QUIETLY);
495505
if (exists
496506
? !status
497507
: status == SHORT_NAME_NOT_FOUND) {

0 commit comments

Comments
 (0)