Skip to content

Commit 8a26ab5

Browse files
committed
odb: add write_packfile, for_each_unique_abbrev, convert_object_id
Add three vtable methods to odb_source that were not part of the recent ps/odb-sources and ps/object-counting series: - write_packfile: ingest a pack from a file descriptor. The files backend chooses between index-pack (large packs) and unpack-objects (small packs below fetch.unpackLimit). Options cover thin-pack fixing, promisor marking, fsck, lockfile capture, and shallow file passing. - for_each_unique_abbrev: iterate objects matching a hex prefix for disambiguation. Searches loose objects via oidtree, then multi-pack indices, then non-MIDX packs. - convert_object_id: translate between hash algorithms using the loose object map. Used during SHA-1 to SHA-256 migration. Also add ODB_SOURCE_HELPER to the source type enum, preparing for the helper backend in the next commit. The write_packfile vtable method replaces the pattern where callers spawn index-pack/unpack-objects directly. fast-import already uses odb_write_packfile() and this allows non-files backends to handle pack ingestion through their own mechanism. Signed-off-by: Aaron Paterson <apaterson@pm.me>
1 parent 41688c1 commit 8a26ab5

6 files changed

Lines changed: 499 additions & 129 deletions

File tree

builtin/fast-import.c

Lines changed: 26 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -518,31 +518,6 @@ static struct object_entry *insert_object(struct object_id *oid)
518518
return e;
519519
}
520520

521-
static void invalidate_pack_id(unsigned int id)
522-
{
523-
unsigned long lu;
524-
struct tag *t;
525-
struct hashmap_iter iter;
526-
struct object_entry *e;
527-
528-
hashmap_for_each_entry(&object_table, &iter, e, ent) {
529-
if (e->pack_id == id)
530-
e->pack_id = MAX_PACK_ID;
531-
}
532-
533-
for (lu = 0; lu < branch_table_sz; lu++) {
534-
struct branch *b;
535-
536-
for (b = branch_table[lu]; b; b = b->table_next_branch)
537-
if (b->pack_id == id)
538-
b->pack_id = MAX_PACK_ID;
539-
}
540-
541-
for (t = first_tag; t; t = t->next_tag)
542-
if (t->pack_id == id)
543-
t->pack_id = MAX_PACK_ID;
544-
}
545-
546521
static unsigned int hc_str(const char *s, size_t len)
547522
{
548523
unsigned int r = 0;
@@ -786,86 +761,21 @@ static void start_packfile(void)
786761
all_packs[pack_id] = p;
787762
}
788763

789-
static const char *create_index(void)
790-
{
791-
const char *tmpfile;
792-
struct pack_idx_entry **idx, **c, **last;
793-
struct object_entry *e;
794-
struct object_entry_pool *o;
795-
796-
/* Build the table of object IDs. */
797-
ALLOC_ARRAY(idx, object_count);
798-
c = idx;
799-
for (o = blocks; o; o = o->next_pool)
800-
for (e = o->next_free; e-- != o->entries;)
801-
if (pack_id == e->pack_id)
802-
*c++ = &e->idx;
803-
last = idx + object_count;
804-
if (c != last)
805-
die(_("internal consistency error creating the index"));
806-
807-
tmpfile = write_idx_file(the_repository, NULL, idx, object_count,
808-
&pack_idx_opts, pack_data->hash);
809-
free(idx);
810-
return tmpfile;
811-
}
812-
813-
static char *keep_pack(const char *curr_index_name)
814-
{
815-
static const char *keep_msg = "fast-import";
816-
struct strbuf name = STRBUF_INIT;
817-
int keep_fd;
818-
819-
odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
820-
keep_fd = safe_create_file_with_leading_directories(pack_data->repo,
821-
name.buf);
822-
if (keep_fd < 0)
823-
die_errno(_("cannot create keep file"));
824-
write_or_die(keep_fd, keep_msg, strlen(keep_msg));
825-
if (close(keep_fd))
826-
die_errno(_("failed to write keep file"));
827-
828-
odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
829-
if (finalize_object_file(pack_data->repo, pack_data->pack_name, name.buf))
830-
die(_("cannot store pack file"));
831-
832-
odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
833-
if (finalize_object_file(pack_data->repo, curr_index_name, name.buf))
834-
die(_("cannot store index file"));
835-
free((void *)curr_index_name);
836-
return strbuf_detach(&name, NULL);
837-
}
838-
839764
static void unkeep_all_packs(void)
840765
{
841766
struct strbuf name = STRBUF_INIT;
842767
int k;
843768

844769
for (k = 0; k < pack_id; k++) {
845770
struct packed_git *p = all_packs[k];
771+
if (!p)
772+
continue;
846773
odb_pack_name(p->repo, &name, p->hash, "keep");
847774
unlink_or_warn(name.buf);
848775
}
849776
strbuf_release(&name);
850777
}
851778

852-
static int loosen_small_pack(const struct packed_git *p)
853-
{
854-
struct child_process unpack = CHILD_PROCESS_INIT;
855-
856-
if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
857-
die_errno(_("failed seeking to start of '%s'"), p->pack_name);
858-
859-
unpack.in = p->pack_fd;
860-
unpack.git_cmd = 1;
861-
unpack.stdout_to_stderr = 1;
862-
strvec_push(&unpack.args, "unpack-objects");
863-
if (!show_stats)
864-
strvec_push(&unpack.args, "-q");
865-
866-
return run_command(&unpack);
867-
}
868-
869779
static void end_packfile(void)
870780
{
871781
static int running;
@@ -876,10 +786,7 @@ static void end_packfile(void)
876786
running = 1;
877787
clear_delta_base_cache();
878788
if (object_count) {
879-
struct odb_source_files *files = odb_source_files_downcast(pack_data->repo->objects->sources);
880-
struct packed_git *new_p;
881789
struct object_id cur_pack_oid;
882-
char *idx_name;
883790
int i;
884791
struct branch *b;
885792
struct tag *t;
@@ -891,26 +798,25 @@ static void end_packfile(void)
891798
object_count, cur_pack_oid.hash,
892799
pack_size);
893800

894-
if (object_count <= unpack_limit) {
895-
if (!loosen_small_pack(pack_data)) {
896-
invalidate_pack_id(pack_id);
897-
goto discard_pack;
898-
}
899-
}
801+
if (lseek(pack_data->pack_fd, 0, SEEK_SET) < 0)
802+
die_errno(_("failed seeking to start of '%s'"),
803+
pack_data->pack_name);
900804

901-
close(pack_data->pack_fd);
902-
idx_name = keep_pack(create_index());
805+
if (odb_write_packfile(the_repository->objects,
806+
pack_data->pack_fd, NULL))
807+
die(_("failed to ingest pack"));
903808

904-
/* Register the packfile with core git's machinery. */
905-
new_p = packfile_store_load_pack(files->packed, idx_name, 1);
906-
if (!new_p)
907-
die(_("core Git rejected index %s"), idx_name);
908-
all_packs[pack_id] = new_p;
909-
free(idx_name);
809+
/*
810+
* Non-files backends do not register a pack on disk,
811+
* so NULL out the slot to prevent use-after-free in
812+
* gfi_unpack_entry.
813+
*/
814+
all_packs[pack_id] = NULL;
910815

911816
/* Print the boundary */
912817
if (pack_edges) {
913-
fprintf(pack_edges, "%s:", new_p->pack_name);
818+
fprintf(pack_edges, "pack-%s:",
819+
hash_to_hex(pack_data->hash));
914820
for (i = 0; i < branch_table_sz; i++) {
915821
for (b = branch_table[i]; b; b = b->table_next_branch) {
916822
if (b->pack_id == pack_id)
@@ -930,7 +836,6 @@ static void end_packfile(void)
930836
pack_id++;
931837
}
932838
else {
933-
discard_pack:
934839
close(pack_data->pack_fd);
935840
unlink_or_warn(pack_data->pack_name);
936841
}
@@ -1239,6 +1144,16 @@ static void *gfi_unpack_entry(
12391144
{
12401145
enum object_type type;
12411146
struct packed_git *p = all_packs[oe->pack_id];
1147+
if (!p) {
1148+
/*
1149+
* Pack was ingested by a non-files backend via
1150+
* odb_write_packfile() and is no longer on disk.
1151+
* Read the object back through the ODB instead.
1152+
*/
1153+
enum object_type odb_type;
1154+
return odb_read_object(the_repository->objects,
1155+
&oe->idx.oid, &odb_type, sizep);
1156+
}
12421157
if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) {
12431158
/* The object is stored in the packfile we are writing to
12441159
* and we have modified it since the last time we scanned

object-name.c

Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "packfile.h"
2121
#include "pretty.h"
2222
#include "object-file.h"
23+
#include "odb/source.h"
2324
#include "read-cache-ll.h"
2425
#include "repo-settings.h"
2526
#include "repository.h"
@@ -111,13 +112,28 @@ static enum cb_next match_prefix(const struct object_id *oid, void *arg)
111112
return ds->ambiguous ? CB_BREAK : CB_CONTINUE;
112113
}
113114

115+
static int disambiguate_cb(const struct object_id *oid,
116+
struct object_info *oi UNUSED, void *data)
117+
{
118+
struct disambiguate_state *ds = data;
119+
update_candidates(ds, oid);
120+
return ds->ambiguous ? 1 : 0;
121+
}
122+
114123
static void find_short_object_filename(struct disambiguate_state *ds)
115124
{
116125
struct odb_source *source;
117126

118-
for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next)
119-
oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx),
120-
&ds->bin_pfx, ds->len, match_prefix, ds);
127+
for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) {
128+
if (source->for_each_unique_abbrev) {
129+
odb_source_for_each_unique_abbrev(
130+
source, &ds->bin_pfx, ds->len,
131+
disambiguate_cb, ds);
132+
} else {
133+
oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx),
134+
&ds->bin_pfx, ds->len, match_prefix, ds);
135+
}
136+
}
121137
}
122138

123139
static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
@@ -208,15 +224,23 @@ static void find_short_packed_object(struct disambiguate_state *ds)
208224

209225
odb_prepare_alternates(ds->repo->objects);
210226
for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) {
211-
struct multi_pack_index *m = get_multi_pack_index(source);
212-
if (m)
213-
unique_in_midx(m, ds);
227+
if (source->for_each_unique_abbrev) {
228+
odb_source_for_each_unique_abbrev(
229+
source, &ds->bin_pfx, ds->len,
230+
disambiguate_cb, ds);
231+
} else {
232+
struct multi_pack_index *m = get_multi_pack_index(source);
233+
if (m)
234+
unique_in_midx(m, ds);
235+
}
214236
}
215237

216-
repo_for_each_pack(ds->repo, p) {
217-
if (ds->ambiguous)
218-
break;
219-
unique_in_pack(p, ds);
238+
if (!ds->repo->objects->sources->for_each_unique_abbrev) {
239+
repo_for_each_pack(ds->repo, p) {
240+
if (ds->ambiguous)
241+
break;
242+
unique_in_pack(p, ds);
243+
}
220244
}
221245
}
222246

@@ -796,19 +820,38 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
796820
mad->init_len = mad->cur_len;
797821
}
798822

799-
static void find_abbrev_len_packed(struct min_abbrev_data *mad)
823+
static int abbrev_len_cb(const struct object_id *oid,
824+
struct object_info *oi UNUSED, void *data)
800825
{
801-
struct packed_git *p;
826+
struct min_abbrev_data *mad = data;
827+
extend_abbrev_len(oid, mad);
828+
return 0;
829+
}
802830

831+
static void find_abbrev_len_packed(struct min_abbrev_data *mad)
832+
{
803833
odb_prepare_alternates(mad->repo->objects);
804-
for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) {
805-
struct multi_pack_index *m = get_multi_pack_index(source);
806-
if (m)
807-
find_abbrev_len_for_midx(m, mad);
834+
835+
for (struct odb_source *source = mad->repo->objects->sources;
836+
source; source = source->next) {
837+
if (source->for_each_unique_abbrev) {
838+
mad->init_len = 0;
839+
odb_source_for_each_unique_abbrev(
840+
source, mad->oid, mad->cur_len,
841+
abbrev_len_cb, mad);
842+
mad->init_len = mad->cur_len;
843+
} else {
844+
struct multi_pack_index *m = get_multi_pack_index(source);
845+
if (m)
846+
find_abbrev_len_for_midx(m, mad);
847+
}
808848
}
809849

810-
repo_for_each_pack(mad->repo, p)
811-
find_abbrev_len_for_pack(p, mad);
850+
if (!mad->repo->objects->sources->for_each_unique_abbrev) {
851+
struct packed_git *p;
852+
repo_for_each_pack(mad->repo, p)
853+
find_abbrev_len_for_pack(p, mad);
854+
}
812855
}
813856

814857
void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,

odb.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,32 @@ int odb_write_object_stream(struct object_database *odb,
981981
return odb_source_write_object_stream(odb->sources, stream, len, oid);
982982
}
983983

984+
int odb_write_packfile(struct object_database *odb,
985+
int pack_fd,
986+
struct odb_write_packfile_options *opts)
987+
{
988+
return odb_source_write_packfile(odb->sources, pack_fd, opts);
989+
}
990+
991+
int odb_for_each_unique_abbrev(struct object_database *odb,
992+
const struct object_id *oid_prefix,
993+
unsigned int prefix_len,
994+
odb_for_each_object_cb cb,
995+
void *cb_data)
996+
{
997+
int ret;
998+
999+
odb_prepare_alternates(odb);
1000+
for (struct odb_source *source = odb->sources; source; source = source->next) {
1001+
ret = odb_source_for_each_unique_abbrev(source, oid_prefix,
1002+
prefix_len, cb, cb_data);
1003+
if (ret)
1004+
return ret;
1005+
}
1006+
1007+
return 0;
1008+
}
1009+
9841010
struct object_database *odb_new(struct repository *repo,
9851011
const char *primary_source,
9861012
const char *secondary_sources)

odb.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,25 @@ int odb_write_object_stream(struct object_database *odb,
570570
struct odb_write_stream *stream, size_t len,
571571
struct object_id *oid);
572572

573+
/*
574+
* Ingest a pack from a file descriptor into the primary source.
575+
* Returns 0 on success, a negative error code otherwise.
576+
*/
577+
struct odb_write_packfile_options;
578+
int odb_write_packfile(struct object_database *odb,
579+
int pack_fd,
580+
struct odb_write_packfile_options *opts);
581+
582+
/*
583+
* Iterate over all objects across all sources whose ID starts with
584+
* the given prefix. Used for object name disambiguation.
585+
*/
586+
int odb_for_each_unique_abbrev(struct object_database *odb,
587+
const struct object_id *oid_prefix,
588+
unsigned int prefix_len,
589+
odb_for_each_object_cb cb,
590+
void *cb_data);
591+
573592
void parse_alternates(const char *string,
574593
int sep,
575594
const char *relative_base,

0 commit comments

Comments
 (0)