Skip to content

Commit 83869e1

Browse files
pks-tgitster
authored andcommitted
odb: introduce generic odb_find_abbrev_len()
Introduce a new generic `odb_find_abbrev_len()` function as well as source-specific callback functions. This makes the logic to compute the required prefix length to make a given object unique fully pluggable. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 6c2ede6 commit 83869e1

5 files changed

Lines changed: 142 additions & 53 deletions

File tree

object-name.c

Lines changed: 4 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
#include "refs.h"
1616
#include "remote.h"
1717
#include "dir.h"
18+
#include "odb.h"
1819
#include "oid-array.h"
19-
#include "packfile.h"
2020
#include "pretty.h"
21-
#include "object-file.h"
2221
#include "read-cache-ll.h"
2322
#include "repo-settings.h"
2423
#include "repository.h"
@@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix,
569568
return ret;
570569
}
571570

572-
/*
573-
* Return the slot of the most-significant bit set in "val". There are various
574-
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
575-
* probably not a big deal here.
576-
*/
577-
static unsigned msb(unsigned long val)
578-
{
579-
unsigned r = 0;
580-
while (val >>= 1)
581-
r++;
582-
return r;
583-
}
584-
585571
void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,
586572
const struct object_id *oid, int abbrev_len)
587573
{
@@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex,
602588
{
603589
const struct git_hash_algo *algo =
604590
oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
605-
const unsigned hexsz = algo->hexsz;
606591
unsigned len;
607592

608-
if (min_len < 0) {
609-
unsigned long count;
610-
611-
if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
612-
count = 0;
613-
614-
/*
615-
* Add one because the MSB only tells us the highest bit set,
616-
* not including the value of all the _other_ bits (so "15"
617-
* is only one off of 2^4, but the MSB is the 3rd bit.
618-
*/
619-
len = msb(count) + 1;
620-
/*
621-
* We now know we have on the order of 2^len objects, which
622-
* expects a collision at 2^(len/2). But we also care about hex
623-
* chars, not bits, and there are 4 bits per hex. So all
624-
* together we need to divide by 2 and round up.
625-
*/
626-
len = DIV_ROUND_UP(len, 2);
627-
/*
628-
* For very small repos, we stick with our regular fallback.
629-
*/
630-
if (len < FALLBACK_DEFAULT_ABBREV)
631-
len = FALLBACK_DEFAULT_ABBREV;
632-
} else {
633-
len = min_len;
634-
}
593+
if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0)
594+
len = algo->hexsz;
635595

636596
oid_to_hex_r(hex, oid);
637-
if (len >= hexsz || !len)
638-
return hexsz;
639-
640-
odb_prepare_alternates(r->objects);
641-
for (struct odb_source *s = r->objects->sources; s; s = s->next) {
642-
struct odb_source_files *files = odb_source_files_downcast(s);
643-
packfile_store_find_abbrev_len(files->packed, oid, len, &len);
644-
odb_source_loose_find_abbrev_len(s, oid, len, &len);
645-
}
646-
647597
hex[len] = 0;
598+
648599
return len;
649600
}
650601

odb.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "midx.h"
1313
#include "object-file-convert.h"
1414
#include "object-file.h"
15+
#include "object-name.h"
1516
#include "odb.h"
1617
#include "packfile.h"
1718
#include "path.h"
@@ -964,6 +965,78 @@ int odb_count_objects(struct object_database *odb,
964965
return ret;
965966
}
966967

968+
/*
969+
* Return the slot of the most-significant bit set in "val". There are various
970+
* ways to do this quickly with fls() or __builtin_clzl(), but speed is
971+
* probably not a big deal here.
972+
*/
973+
static unsigned msb(unsigned long val)
974+
{
975+
unsigned r = 0;
976+
while (val >>= 1)
977+
r++;
978+
return r;
979+
}
980+
981+
int odb_find_abbrev_len(struct object_database *odb,
982+
const struct object_id *oid,
983+
int min_length,
984+
unsigned *out)
985+
{
986+
const struct git_hash_algo *algo =
987+
oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo;
988+
const unsigned hexsz = algo->hexsz;
989+
unsigned len;
990+
int ret;
991+
992+
if (min_length < 0) {
993+
unsigned long count;
994+
995+
if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
996+
count = 0;
997+
998+
/*
999+
* Add one because the MSB only tells us the highest bit set,
1000+
* not including the value of all the _other_ bits (so "15"
1001+
* is only one off of 2^4, but the MSB is the 3rd bit.
1002+
*/
1003+
len = msb(count) + 1;
1004+
/*
1005+
* We now know we have on the order of 2^len objects, which
1006+
* expects a collision at 2^(len/2). But we also care about hex
1007+
* chars, not bits, and there are 4 bits per hex. So all
1008+
* together we need to divide by 2 and round up.
1009+
*/
1010+
len = DIV_ROUND_UP(len, 2);
1011+
/*
1012+
* For very small repos, we stick with our regular fallback.
1013+
*/
1014+
if (len < FALLBACK_DEFAULT_ABBREV)
1015+
len = FALLBACK_DEFAULT_ABBREV;
1016+
} else {
1017+
len = min_length;
1018+
}
1019+
1020+
if (len >= hexsz || !len) {
1021+
*out = hexsz;
1022+
ret = 0;
1023+
goto out;
1024+
}
1025+
1026+
odb_prepare_alternates(odb);
1027+
for (struct odb_source *source = odb->sources; source; source = source->next) {
1028+
ret = odb_source_find_abbrev_len(source, oid, len, &len);
1029+
if (ret)
1030+
goto out;
1031+
}
1032+
1033+
ret = 0;
1034+
*out = len;
1035+
1036+
out:
1037+
return ret;
1038+
}
1039+
9671040
void odb_assert_oid_type(struct object_database *odb,
9681041
const struct object_id *oid, enum object_type expect)
9691042
{

odb.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb,
545545
enum odb_count_objects_flags flags,
546546
unsigned long *out);
547547

548+
/*
549+
* Given an object ID, find the minimum required length required to make the
550+
* object ID unique across the whole object database.
551+
*
552+
* The `min_len` determines the minimum abbreviated length that'll be returned
553+
* by this function. If `min_len < 0`, then the function will set a sensible
554+
* default minimum abbreviation length.
555+
*
556+
* Returns 0 on success, a negative error code otherwise. The computed length
557+
* will be assigned to `*out`.
558+
*/
559+
int odb_find_abbrev_len(struct object_database *odb,
560+
const struct object_id *oid,
561+
int min_len,
562+
unsigned *out);
563+
548564
enum {
549565
/*
550566
* By default, `odb_write_object()` does not actually write anything

odb/source-files.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,30 @@ static int odb_source_files_count_objects(struct odb_source *source,
122122
return ret;
123123
}
124124

125+
static int odb_source_files_find_abbrev_len(struct odb_source *source,
126+
const struct object_id *oid,
127+
unsigned min_len,
128+
unsigned *out)
129+
{
130+
struct odb_source_files *files = odb_source_files_downcast(source);
131+
unsigned len = min_len;
132+
int ret;
133+
134+
ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len);
135+
if (ret < 0)
136+
goto out;
137+
138+
ret = odb_source_loose_find_abbrev_len(source, oid, len, &len);
139+
if (ret < 0)
140+
goto out;
141+
142+
*out = len;
143+
ret = 0;
144+
145+
out:
146+
return ret;
147+
}
148+
125149
static int odb_source_files_freshen_object(struct odb_source *source,
126150
const struct object_id *oid)
127151
{
@@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
250274
files->base.read_object_stream = odb_source_files_read_object_stream;
251275
files->base.for_each_object = odb_source_files_for_each_object;
252276
files->base.count_objects = odb_source_files_count_objects;
277+
files->base.find_abbrev_len = odb_source_files_find_abbrev_len;
253278
files->base.freshen_object = odb_source_files_freshen_object;
254279
files->base.write_object = odb_source_files_write_object;
255280
files->base.write_object_stream = odb_source_files_write_object_stream;

odb/source.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,18 @@ struct odb_source {
157157
enum odb_count_objects_flags flags,
158158
unsigned long *out);
159159

160+
/*
161+
* This callback is expected to find the minimum required length to
162+
* make the given object ID unique.
163+
*
164+
* The callback is expected to return a negative error code in case it
165+
* failed, 0 otherwise.
166+
*/
167+
int (*find_abbrev_len)(struct odb_source *source,
168+
const struct object_id *oid,
169+
unsigned min_length,
170+
unsigned *out);
171+
160172
/*
161173
* This callback is expected to freshen the given object so that its
162174
* last access time is set to the current time. This is used to ensure
@@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source,
360372
return source->count_objects(source, flags, out);
361373
}
362374

375+
/*
376+
* Determine the minimum required length to make the given object ID unique in
377+
* the given source. Returns 0 on success, a negative error code otherwise.
378+
*/
379+
static inline int odb_source_find_abbrev_len(struct odb_source *source,
380+
const struct object_id *oid,
381+
unsigned min_len,
382+
unsigned *out)
383+
{
384+
return source->find_abbrev_len(source, oid, min_len, out);
385+
}
386+
363387
/*
364388
* Freshen an object in the object database by updating its timestamp.
365389
* Returns 1 in case the object has been freshened, 0 in case the object does

0 commit comments

Comments
 (0)