Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b05d3da

Browse files
committedMar 26, 2025
survey: summarize total sizes by object type
Now that we have explored objects by count, we can expand that a bit more to summarize the data for the on-disk and inflated size of those objects. This information is helpful for diagnosing both why disk space (and perhaps clone or fetch times) is growing but also why certain operations are slow because the inflated size of the abstract objects that must be processed is so large. Signed-off-by: Derrick Stolee <[email protected]>
1 parent d8cad61 commit b05d3da

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed
 

‎builtin/survey.c

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,36 @@ struct survey_report_object_summary {
6565
size_t blobs_nr;
6666
};
6767

68+
/**
69+
* For some category given by 'label', count the number of objects
70+
* that match that label along with the on-disk size and the size
71+
* after decompressing (both with delta bases and zlib).
72+
*/
73+
struct survey_report_object_size_summary {
74+
char *label;
75+
size_t nr;
76+
size_t disk_size;
77+
size_t inflated_size;
78+
size_t num_missing;
79+
};
80+
6881
/**
6982
* This struct contains all of the information that needs to be printed
7083
* at the end of the exploration of the repository and its references.
7184
*/
7285
struct survey_report {
7386
struct survey_report_ref_summary refs;
7487
struct survey_report_object_summary reachable_objects;
88+
89+
struct survey_report_object_size_summary *by_type;
7590
};
7691

92+
#define REPORT_TYPE_COMMIT 0
93+
#define REPORT_TYPE_TREE 1
94+
#define REPORT_TYPE_BLOB 2
95+
#define REPORT_TYPE_TAG 3
96+
#define REPORT_TYPE_COUNT 4
97+
7798
struct survey_context {
7899
struct repository *repo;
79100

@@ -285,12 +306,48 @@ static void survey_report_plaintext_reachable_object_summary(struct survey_conte
285306
clear_table(&table);
286307
}
287308

309+
static void survey_report_object_sizes(const char *title,
310+
const char *categories,
311+
struct survey_report_object_size_summary *summary,
312+
size_t summary_nr)
313+
{
314+
struct survey_table table = SURVEY_TABLE_INIT;
315+
table.table_name = title;
316+
317+
strvec_push(&table.header, categories);
318+
strvec_push(&table.header, _("Count"));
319+
strvec_push(&table.header, _("Disk Size"));
320+
strvec_push(&table.header, _("Inflated Size"));
321+
322+
for (size_t i = 0; i < summary_nr; i++) {
323+
char *label_str = xstrdup(summary[i].label);
324+
char *nr_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].nr);
325+
char *disk_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].disk_size);
326+
char *inflate_str = xstrfmt("%"PRIuMAX, (uintmax_t)summary[i].inflated_size);
327+
328+
insert_table_rowv(&table, label_str, nr_str,
329+
disk_str, inflate_str, NULL);
330+
331+
free(label_str);
332+
free(nr_str);
333+
free(disk_str);
334+
free(inflate_str);
335+
}
336+
337+
print_table_plaintext(&table);
338+
clear_table(&table);
339+
}
340+
288341
static void survey_report_plaintext(struct survey_context *ctx)
289342
{
290343
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
291344
printf("-----------------------------------------------------\n");
292345
survey_report_plaintext_refs(ctx);
293346
survey_report_plaintext_reachable_object_summary(ctx);
347+
survey_report_object_sizes(_("TOTAL OBJECT SIZES BY TYPE"),
348+
_("Object Type"),
349+
ctx->report.by_type,
350+
REPORT_TYPE_COUNT);
294351
}
295352

296353
/*
@@ -503,6 +560,68 @@ static void increment_object_counts(
503560
}
504561
}
505562

563+
static void increment_totals(struct survey_context *ctx,
564+
struct oid_array *oids,
565+
struct survey_report_object_size_summary *summary)
566+
{
567+
for (size_t i = 0; i < oids->nr; i++) {
568+
struct object_info oi = OBJECT_INFO_INIT;
569+
unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH;
570+
unsigned long object_length = 0;
571+
off_t disk_sizep = 0;
572+
enum object_type type;
573+
574+
oi.typep = &type;
575+
oi.sizep = &object_length;
576+
oi.disk_sizep = &disk_sizep;
577+
578+
if (oid_object_info_extended(ctx->repo, &oids->oid[i],
579+
&oi, oi_flags) < 0) {
580+
summary->num_missing++;
581+
} else {
582+
summary->nr++;
583+
summary->disk_size += disk_sizep;
584+
summary->inflated_size += object_length;
585+
}
586+
}
587+
}
588+
589+
static void increment_object_totals(struct survey_context *ctx,
590+
struct oid_array *oids,
591+
enum object_type type)
592+
{
593+
struct survey_report_object_size_summary *total;
594+
struct survey_report_object_size_summary summary = { 0 };
595+
596+
increment_totals(ctx, oids, &summary);
597+
598+
switch (type) {
599+
case OBJ_COMMIT:
600+
total = &ctx->report.by_type[REPORT_TYPE_COMMIT];
601+
break;
602+
603+
case OBJ_TREE:
604+
total = &ctx->report.by_type[REPORT_TYPE_TREE];
605+
break;
606+
607+
case OBJ_BLOB:
608+
total = &ctx->report.by_type[REPORT_TYPE_BLOB];
609+
break;
610+
611+
case OBJ_TAG:
612+
total = &ctx->report.by_type[REPORT_TYPE_TAG];
613+
break;
614+
615+
default:
616+
BUG("No other type allowed");
617+
}
618+
619+
total->nr += summary.nr;
620+
total->disk_size += summary.disk_size;
621+
total->inflated_size += summary.inflated_size;
622+
total->num_missing += summary.num_missing;
623+
}
624+
506625
static int survey_objects_path_walk_fn(const char *path UNUSED,
507626
struct oid_array *oids,
508627
enum object_type type,
@@ -512,10 +631,20 @@ static int survey_objects_path_walk_fn(const char *path UNUSED,
512631

513632
increment_object_counts(&ctx->report.reachable_objects,
514633
type, oids->nr);
634+
increment_object_totals(ctx, oids, type);
515635

516636
return 0;
517637
}
518638

639+
static void initialize_report(struct survey_context *ctx)
640+
{
641+
CALLOC_ARRAY(ctx->report.by_type, REPORT_TYPE_COUNT);
642+
ctx->report.by_type[REPORT_TYPE_COMMIT].label = xstrdup(_("Commits"));
643+
ctx->report.by_type[REPORT_TYPE_TREE].label = xstrdup(_("Trees"));
644+
ctx->report.by_type[REPORT_TYPE_BLOB].label = xstrdup(_("Blobs"));
645+
ctx->report.by_type[REPORT_TYPE_TAG].label = xstrdup(_("Tags"));
646+
}
647+
519648
static void survey_phase_objects(struct survey_context *ctx)
520649
{
521650
struct rev_info revs = REV_INFO_INIT;
@@ -528,12 +657,15 @@ static void survey_phase_objects(struct survey_context *ctx)
528657
info.path_fn = survey_objects_path_walk_fn;
529658
info.path_fn_data = ctx;
530659

660+
initialize_report(ctx);
661+
531662
repo_init_revisions(ctx->repo, &revs, "");
532663
revs.tag_objects = 1;
533664

534665
for (int i = 0; i < ctx->ref_array.nr; i++) {
535666
struct ref_array_item *item = ctx->ref_array.items[i];
536667
add_pending_oid(&revs, NULL, &item->objectname, add_flags);
668+
display_progress(ctx->progress, ++(ctx->progress_nr));
537669
}
538670

539671
walk_objects_by_path(&info);

‎t/t8100-git-survey.sh

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,26 @@ test_expect_success 'git survey (default)' '
2424
git survey --all-refs >out 2>err &&
2525
test_line_count = 0 err &&
2626
27+
test_oid_cache <<-EOF &&
28+
commits_size_on_disk sha1: 1523
29+
commits_size_on_disk sha256: 1811
30+
31+
commits_size sha1: 2153
32+
commits_size sha256: 2609
33+
34+
trees_size_on_disk sha1: 495
35+
trees_size_on_disk sha256: 635
36+
37+
trees_size sha1: 1706
38+
trees_size sha256: 2366
39+
40+
tags_size sha1: 528
41+
tags_size sha256: 624
42+
43+
tags_size_on_disk sha1: 510
44+
tags_size_on_disk sha256: 569
45+
EOF
46+
2747
tr , " " >expect <<-EOF &&
2848
GIT SURVEY for "$(pwd)"
2949
-----------------------------------------------------
@@ -45,6 +65,15 @@ test_expect_success 'git survey (default)' '
4565
Commits | 10
4666
Trees | 10
4767
Blobs | 10
68+
69+
TOTAL OBJECT SIZES BY TYPE
70+
===============================================
71+
Object Type | Count | Disk Size | Inflated Size
72+
------------+-------+-----------+--------------
73+
Commits | 10 | $(test_oid commits_size_on_disk) | $(test_oid commits_size)
74+
Trees | 10 | $(test_oid trees_size_on_disk) | $(test_oid trees_size)
75+
Blobs | 10 | 191 | 101
76+
Tags | 4 | $(test_oid tags_size_on_disk) | $(test_oid tags_size)
4877
EOF
4978
5079
test_cmp expect out

0 commit comments

Comments
 (0)
Please sign in to comment.