Skip to content

Commit 3bf057a

Browse files
committedSep 12, 2024
Merge branch 'tb/multi-pack-reuse-fix'
A data corruption bug when multi-pack-index is used and the same objects are stored in multiple packfiles has been corrected. * tb/multi-pack-reuse-fix: builtin/pack-objects.c: do not open-code `MAX_PACK_OBJECT_HEADER` pack-bitmap.c: avoid repeated `pack_pos_to_offset()` during reuse builtin/pack-objects.c: translate bit positions during pack-reuse pack-bitmap: tag bitmapped packs with their corresponding MIDX t/t5332-multi-pack-reuse.sh: verify pack generation with --strict
2 parents 04595eb + d3e7db2 commit 3bf057a

File tree

5 files changed

+78
-17
lines changed

5 files changed

+78
-17
lines changed
 

‎builtin/pack-objects.c

+37-9
Original file line numberDiff line numberDiff line change
@@ -1072,7 +1072,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
10721072
fixup = find_reused_offset(offset) -
10731073
find_reused_offset(base_offset);
10741074
if (fixup) {
1075-
unsigned char ofs_header[10];
1075+
unsigned char ofs_header[MAX_PACK_OBJECT_HEADER];
10761076
unsigned i, ofs_len;
10771077
off_t ofs = offset - base_offset - fixup;
10781078

@@ -1191,6 +1191,7 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
11911191
size_t pos = (i * BITS_IN_EWORD);
11921192

11931193
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1194+
uint32_t pack_pos;
11941195
if ((word >> offset) == 0)
11951196
break;
11961197

@@ -1199,14 +1200,41 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
11991200
continue;
12001201
if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr)
12011202
goto done;
1202-
/*
1203-
* Can use bit positions directly, even for MIDX
1204-
* bitmaps. See comment in try_partial_reuse()
1205-
* for why.
1206-
*/
1207-
write_reused_pack_one(reuse_packfile->p,
1208-
pos + offset - reuse_packfile->bitmap_pos,
1209-
f, pack_start, &w_curs);
1203+
1204+
if (reuse_packfile->bitmap_pos) {
1205+
/*
1206+
* When doing multi-pack reuse on a
1207+
* non-preferred pack, translate bit positions
1208+
* from the MIDX pseudo-pack order back to their
1209+
* pack-relative positions before attempting
1210+
* reuse.
1211+
*/
1212+
struct multi_pack_index *m = reuse_packfile->from_midx;
1213+
uint32_t midx_pos;
1214+
off_t pack_ofs;
1215+
1216+
if (!m)
1217+
BUG("non-zero bitmap position without MIDX");
1218+
1219+
midx_pos = pack_pos_to_midx(m, pos + offset);
1220+
pack_ofs = nth_midxed_offset(m, midx_pos);
1221+
1222+
if (offset_to_pack_pos(reuse_packfile->p,
1223+
pack_ofs, &pack_pos) < 0)
1224+
BUG("could not find expected object at offset %"PRIuMAX" in pack %s",
1225+
(uintmax_t)pack_ofs,
1226+
pack_basename(reuse_packfile->p));
1227+
} else {
1228+
/*
1229+
* Can use bit positions directly, even for MIDX
1230+
* bitmaps. See comment in try_partial_reuse()
1231+
* for why.
1232+
*/
1233+
pack_pos = pos + offset;
1234+
}
1235+
1236+
write_reused_pack_one(reuse_packfile->p, pack_pos, f,
1237+
pack_start, &w_curs);
12101238
display_progress(progress_state, ++written);
12111239
}
12121240
}

‎midx.c

+1
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,7 @@ int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
496496
MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
497497
sizeof(uint32_t));
498498
bp->pack_int_id = pack_int_id;
499+
bp->from_midx = m;
499500

500501
return 0;
501502
}

‎pack-bitmap.c

+8-4
Original file line numberDiff line numberDiff line change
@@ -2055,17 +2055,18 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
20552055
struct bitmapped_pack *pack,
20562056
size_t bitmap_pos,
20572057
uint32_t pack_pos,
2058+
off_t offset,
20582059
struct bitmap *reuse,
20592060
struct pack_window **w_curs)
20602061
{
2061-
off_t offset, delta_obj_offset;
2062+
off_t delta_obj_offset;
20622063
enum object_type type;
20632064
unsigned long size;
20642065

20652066
if (pack_pos >= pack->p->num_objects)
20662067
return -1; /* not actually in the pack */
20672068

2068-
offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos);
2069+
delta_obj_offset = offset;
20692070
type = unpack_object_header(pack->p, w_curs, &offset, &size);
20702071
if (type < 0)
20712072
return -1; /* broken packfile, punt */
@@ -2184,6 +2185,7 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
21842185
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
21852186
size_t bit_pos;
21862187
uint32_t pack_pos;
2188+
off_t ofs;
21872189

21882190
if (word >> offset == 0)
21892191
break;
@@ -2198,7 +2200,6 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
21982200

21992201
if (bitmap_is_midx(bitmap_git)) {
22002202
uint32_t midx_pos;
2201-
off_t ofs;
22022203

22032204
midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
22042205
ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
@@ -2213,10 +2214,12 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
22132214
BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")",
22142215
pack_basename(pack->p), (uintmax_t)pack_pos,
22152216
pack->p->num_objects);
2217+
2218+
ofs = pack_pos_to_offset(pack->p, pack_pos);
22162219
}
22172220

22182221
if (try_partial_reuse(bitmap_git, pack, bit_pos,
2219-
pack_pos, reuse, &w_curs) < 0) {
2222+
pack_pos, ofs, reuse, &w_curs) < 0) {
22202223
/*
22212224
* try_partial_reuse indicated we couldn't reuse
22222225
* any bits, so there is no point in trying more
@@ -2322,6 +2325,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
23222325
packs[packs_nr].pack_int_id = pack_int_id;
23232326
packs[packs_nr].bitmap_nr = pack->num_objects;
23242327
packs[packs_nr].bitmap_pos = 0;
2328+
packs[packs_nr].from_midx = bitmap_git->midx;
23252329

23262330
objects_nr = packs[packs_nr++].bitmap_nr;
23272331
}

‎pack-bitmap.h

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct bitmapped_pack {
6060
uint32_t bitmap_pos;
6161
uint32_t bitmap_nr;
6262

63+
struct multi_pack_index *from_midx; /* MIDX only */
6364
uint32_t pack_int_id; /* MIDX only */
6465
};
6566

‎t/t5332-multi-pack-reuse.sh

+31-4
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,24 @@ test_pack_objects_reused_all () {
3131
: >trace2.txt &&
3232
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
3333
git pack-objects --stdout --revs --all --delta-base-offset \
34-
>/dev/null &&
34+
>got.pack &&
3535

3636
test_pack_reused "$1" <trace2.txt &&
37-
test_packs_reused "$2" <trace2.txt
37+
test_packs_reused "$2" <trace2.txt &&
38+
39+
git index-pack --strict -o got.idx got.pack
3840
}
3941

4042
# test_pack_objects_reused <pack-reused> <packs-reused>
4143
test_pack_objects_reused () {
4244
: >trace2.txt &&
4345
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
44-
git pack-objects --stdout --revs >/dev/null &&
46+
git pack-objects --stdout --revs >got.pack &&
4547

4648
test_pack_reused "$1" <trace2.txt &&
47-
test_packs_reused "$2" <trace2.txt
49+
test_packs_reused "$2" <trace2.txt &&
50+
51+
git index-pack --strict -o got.idx got.pack
4852
}
4953

5054
test_expect_success 'preferred pack is reused for single-pack reuse' '
@@ -232,4 +236,27 @@ test_expect_success 'non-omitted delta in MIDX preferred pack' '
232236
test_pack_objects_reused_all $(wc -l <expect) 1
233237
'
234238

239+
test_expect_success 'duplicate objects' '
240+
git init duplicate-objects &&
241+
(
242+
cd duplicate-objects &&
243+
244+
git config pack.allowPackReuse multi &&
245+
246+
test_commit base &&
247+
248+
git repack -a &&
249+
250+
git rev-parse HEAD^{tree} >in &&
251+
p="$(git pack-objects $packdir/pack <in)" &&
252+
253+
git multi-pack-index write --bitmap --preferred-pack=pack-$p.idx &&
254+
255+
objects_nr="$(git rev-list --count --all --objects)" &&
256+
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
257+
258+
test_pack_objects_reused_all $objects_nr $packs_nr
259+
)
260+
'
261+
235262
test_done

0 commit comments

Comments
 (0)