Skip to content

Commit e33ac9c

Browse files
jltoblergitster
authored andcommitted
builtin/repo: collect largest inflated objects
The "structure" output for git-repo(1) shows the total inflated and disk sizes of reachable objects in the repository, but doesn't show the size of the largest individual objects. Since an individual object may be a large contributor to the overall repository size, it is useful for users to know the maximum size of individual objects. While interating across objects, record the size and OID of the largest objects encountered for each object type to provide as output. Note that the default "table" output format only displays size information and not the corresponding OID. In a subsequent commit, the table format is updated to add table annotations that mention the OID. Signed-off-by: Justin Tobler <jltobler@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent fa17527 commit e33ac9c

3 files changed

Lines changed: 92 additions & 0 deletions

File tree

Documentation/git-repo.adoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ supported:
5252
* Reachable object counts categorized by type
5353
* Total inflated size of reachable objects by type
5454
* Total disk size of reachable objects by type
55+
* Largest reachable objects in the repository by type
5556
+
5657
The output format can be chosen through the flag `--format`. Three formats are
5758
supported:

builtin/repo.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "builtin.h"
44
#include "environment.h"
5+
#include "hash.h"
56
#include "hex.h"
67
#include "odb.h"
78
#include "parse-options.h"
@@ -197,6 +198,18 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix,
197198
return print_fields(argc, argv, repo, format);
198199
}
199200

201+
struct object_data {
202+
struct object_id oid;
203+
size_t value;
204+
};
205+
206+
struct largest_objects {
207+
struct object_data tag_size;
208+
struct object_data commit_size;
209+
struct object_data tree_size;
210+
struct object_data blob_size;
211+
};
212+
200213
struct ref_stats {
201214
size_t branches;
202215
size_t remotes;
@@ -215,6 +228,7 @@ struct object_stats {
215228
struct object_values type_counts;
216229
struct object_values inflated_sizes;
217230
struct object_values disk_sizes;
231+
struct largest_objects largest;
218232
};
219233

220234
struct repo_structure {
@@ -371,6 +385,21 @@ static void stats_table_setup_structure(struct stats_table *table,
371385
" * %s", _("Blobs"));
372386
stats_table_size_addf(table, objects->disk_sizes.tags,
373387
" * %s", _("Tags"));
388+
389+
stats_table_addf(table, "");
390+
stats_table_addf(table, "* %s", _("Largest objects"));
391+
stats_table_addf(table, " * %s", _("Commits"));
392+
stats_table_size_addf(table, objects->largest.commit_size.value,
393+
" * %s", _("Maximum size"));
394+
stats_table_addf(table, " * %s", _("Trees"));
395+
stats_table_size_addf(table, objects->largest.tree_size.value,
396+
" * %s", _("Maximum size"));
397+
stats_table_addf(table, " * %s", _("Blobs"));
398+
stats_table_size_addf(table, objects->largest.blob_size.value,
399+
" * %s", _("Maximum size"));
400+
stats_table_addf(table, " * %s", _("Tags"));
401+
stats_table_size_addf(table, objects->largest.tag_size.value,
402+
" * %s", _("Maximum size"));
374403
}
375404

376405
static void stats_table_print_structure(const struct stats_table *table)
@@ -453,6 +482,14 @@ static inline void print_keyvalue(const char *key, char key_delim, size_t value,
453482
value_delim);
454483
}
455484

485+
static void print_object_data(const char *key, char key_delim,
486+
struct object_data *data, char value_delim)
487+
{
488+
print_keyvalue(key, key_delim, data->value, value_delim);
489+
printf("%s_oid%c%s%c", key, key_delim, oid_to_hex(&data->oid),
490+
value_delim);
491+
}
492+
456493
static void structure_keyvalue_print(struct repo_structure *stats,
457494
char key_delim, char value_delim)
458495
{
@@ -492,6 +529,15 @@ static void structure_keyvalue_print(struct repo_structure *stats,
492529
print_keyvalue("objects.tags.disk_size", key_delim,
493530
stats->objects.disk_sizes.tags, value_delim);
494531

532+
print_object_data("objects.commits.max_size", key_delim,
533+
&stats->objects.largest.commit_size, value_delim);
534+
print_object_data("objects.trees.max_size", key_delim,
535+
&stats->objects.largest.tree_size, value_delim);
536+
print_object_data("objects.blobs.max_size", key_delim,
537+
&stats->objects.largest.blob_size, value_delim);
538+
print_object_data("objects.tags.max_size", key_delim,
539+
&stats->objects.largest.tag_size, value_delim);
540+
495541
fflush(stdout);
496542
}
497543

@@ -560,6 +606,15 @@ struct count_objects_data {
560606
struct progress *progress;
561607
};
562608

609+
static void check_largest(struct object_data *data, struct object_id *oid,
610+
size_t value)
611+
{
612+
if (value > data->value || is_null_oid(&data->oid)) {
613+
oidcpy(&data->oid, oid);
614+
data->value = value;
615+
}
616+
}
617+
563618
static int count_objects(const char *path UNUSED, struct oid_array *oids,
564619
enum object_type type, void *cb_data)
565620
{
@@ -585,21 +640,29 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
585640
stats->type_counts.tags++;
586641
stats->inflated_sizes.tags += inflated;
587642
stats->disk_sizes.tags += disk;
643+
check_largest(&stats->largest.tag_size, &oids->oid[i],
644+
inflated);
588645
break;
589646
case OBJ_COMMIT:
590647
stats->type_counts.commits++;
591648
stats->inflated_sizes.commits += inflated;
592649
stats->disk_sizes.commits += disk;
650+
check_largest(&stats->largest.commit_size, &oids->oid[i],
651+
inflated);
593652
break;
594653
case OBJ_TREE:
595654
stats->type_counts.trees++;
596655
stats->inflated_sizes.trees += inflated;
597656
stats->disk_sizes.trees += disk;
657+
check_largest(&stats->largest.tree_size, &oids->oid[i],
658+
inflated);
598659
break;
599660
case OBJ_BLOB:
600661
stats->type_counts.blobs++;
601662
stats->inflated_sizes.blobs += inflated;
602663
stats->disk_sizes.blobs += disk;
664+
check_largest(&stats->largest.blob_size, &oids->oid[i],
665+
inflated);
603666
break;
604667
default:
605668
BUG("invalid object type");

t/t1901-repo-structure.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ test_expect_success 'empty repository' '
5252
| * Trees | 0 B |
5353
| * Blobs | 0 B |
5454
| * Tags | 0 B |
55+
| | |
56+
| * Largest objects | |
57+
| * Commits | |
58+
| * Maximum size | 0 B |
59+
| * Trees | |
60+
| * Maximum size | 0 B |
61+
| * Blobs | |
62+
| * Maximum size | 0 B |
63+
| * Tags | |
64+
| * Maximum size | 0 B |
5565
EOF
5666
5767
git repo structure >out 2>err &&
@@ -104,6 +114,16 @@ test_expect_success SHA1 'repository with references and objects' '
104114
| * Trees | $(object_type_disk_usage tree true) |
105115
| * Blobs | $(object_type_disk_usage blob true) |
106116
| * Tags | $(object_type_disk_usage tag) B |
117+
| | |
118+
| * Largest objects | |
119+
| * Commits | |
120+
| * Maximum size | 223 B |
121+
| * Trees | |
122+
| * Maximum size | 32.29 KiB |
123+
| * Blobs | |
124+
| * Maximum size | 13 B |
125+
| * Tags | |
126+
| * Maximum size | 132 B |
107127
EOF
108128
109129
git repo structure >out 2>err &&
@@ -138,6 +158,14 @@ test_expect_success SHA1 'keyvalue and nul format' '
138158
objects.trees.disk_size=$(object_type_disk_usage tree)
139159
objects.blobs.disk_size=$(object_type_disk_usage blob)
140160
objects.tags.disk_size=$(object_type_disk_usage tag)
161+
objects.commits.max_size=221
162+
objects.commits.max_size_oid=de3508174b5c2ace6993da67cae9be9069e2df39
163+
objects.trees.max_size=1335
164+
objects.trees.max_size_oid=09931deea9d81ec21300d3e13c74412f32eacec5
165+
objects.blobs.max_size=11
166+
objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f
167+
objects.tags.max_size=132
168+
objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c
141169
EOF
142170
143171
git repo structure --format=keyvalue >out 2>err &&

0 commit comments

Comments
 (0)