Skip to content

Commit 1c7d7eb

Browse files
committed
odb: add write_packfile, for_each_unique_abbrev, convert_object_id
Add three vtable methods to odb_source that were not part of the recent ps/odb-sources and ps/object-counting series: - write_packfile: ingest a pack from a file descriptor. The files backend chooses between index-pack (large packs) and unpack-objects (small packs below fetch.unpackLimit). Options cover thin-pack fixing, promisor marking, fsck, lockfile capture, and shallow file passing. - for_each_unique_abbrev: iterate objects matching a hex prefix for disambiguation. Searches loose objects via oidtree, then multi-pack indices, then non-MIDX packs. - convert_object_id: translate between hash algorithms using the loose object map. Used during SHA-1 to SHA-256 migration. Also add ODB_SOURCE_HELPER to the source type enum, preparing for the helper backend in the next commit. The write_packfile vtable method replaces the pattern where callers spawn index-pack/unpack-objects directly. fast-import already uses odb_write_packfile() and this allows non-files backends to handle pack ingestion through their own mechanism. Signed-off-by: Aaron Paterson <apaterson@pm.me>
1 parent 41688c1 commit 1c7d7eb

11 files changed

Lines changed: 604 additions & 242 deletions

File tree

builtin/fast-import.c

Lines changed: 28 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -518,31 +518,6 @@ static struct object_entry *insert_object(struct object_id *oid)
518518
return e;
519519
}
520520

521-
static void invalidate_pack_id(unsigned int id)
522-
{
523-
unsigned long lu;
524-
struct tag *t;
525-
struct hashmap_iter iter;
526-
struct object_entry *e;
527-
528-
hashmap_for_each_entry(&object_table, &iter, e, ent) {
529-
if (e->pack_id == id)
530-
e->pack_id = MAX_PACK_ID;
531-
}
532-
533-
for (lu = 0; lu < branch_table_sz; lu++) {
534-
struct branch *b;
535-
536-
for (b = branch_table[lu]; b; b = b->table_next_branch)
537-
if (b->pack_id == id)
538-
b->pack_id = MAX_PACK_ID;
539-
}
540-
541-
for (t = first_tag; t; t = t->next_tag)
542-
if (t->pack_id == id)
543-
t->pack_id = MAX_PACK_ID;
544-
}
545-
546521
static unsigned int hc_str(const char *s, size_t len)
547522
{
548523
unsigned int r = 0;
@@ -786,86 +761,21 @@ static void start_packfile(void)
786761
all_packs[pack_id] = p;
787762
}
788763

789-
static const char *create_index(void)
790-
{
791-
const char *tmpfile;
792-
struct pack_idx_entry **idx, **c, **last;
793-
struct object_entry *e;
794-
struct object_entry_pool *o;
795-
796-
/* Build the table of object IDs. */
797-
ALLOC_ARRAY(idx, object_count);
798-
c = idx;
799-
for (o = blocks; o; o = o->next_pool)
800-
for (e = o->next_free; e-- != o->entries;)
801-
if (pack_id == e->pack_id)
802-
*c++ = &e->idx;
803-
last = idx + object_count;
804-
if (c != last)
805-
die(_("internal consistency error creating the index"));
806-
807-
tmpfile = write_idx_file(the_repository, NULL, idx, object_count,
808-
&pack_idx_opts, pack_data->hash);
809-
free(idx);
810-
return tmpfile;
811-
}
812-
813-
static char *keep_pack(const char *curr_index_name)
814-
{
815-
static const char *keep_msg = "fast-import";
816-
struct strbuf name = STRBUF_INIT;
817-
int keep_fd;
818-
819-
odb_pack_name(pack_data->repo, &name, pack_data->hash, "keep");
820-
keep_fd = safe_create_file_with_leading_directories(pack_data->repo,
821-
name.buf);
822-
if (keep_fd < 0)
823-
die_errno(_("cannot create keep file"));
824-
write_or_die(keep_fd, keep_msg, strlen(keep_msg));
825-
if (close(keep_fd))
826-
die_errno(_("failed to write keep file"));
827-
828-
odb_pack_name(pack_data->repo, &name, pack_data->hash, "pack");
829-
if (finalize_object_file(pack_data->repo, pack_data->pack_name, name.buf))
830-
die(_("cannot store pack file"));
831-
832-
odb_pack_name(pack_data->repo, &name, pack_data->hash, "idx");
833-
if (finalize_object_file(pack_data->repo, curr_index_name, name.buf))
834-
die(_("cannot store index file"));
835-
free((void *)curr_index_name);
836-
return strbuf_detach(&name, NULL);
837-
}
838-
839764
static void unkeep_all_packs(void)
840765
{
841766
struct strbuf name = STRBUF_INIT;
842767
int k;
843768

844769
for (k = 0; k < pack_id; k++) {
845770
struct packed_git *p = all_packs[k];
771+
if (!p)
772+
continue;
846773
odb_pack_name(p->repo, &name, p->hash, "keep");
847774
unlink_or_warn(name.buf);
848775
}
849776
strbuf_release(&name);
850777
}
851778

852-
static int loosen_small_pack(const struct packed_git *p)
853-
{
854-
struct child_process unpack = CHILD_PROCESS_INIT;
855-
856-
if (lseek(p->pack_fd, 0, SEEK_SET) < 0)
857-
die_errno(_("failed seeking to start of '%s'"), p->pack_name);
858-
859-
unpack.in = p->pack_fd;
860-
unpack.git_cmd = 1;
861-
unpack.stdout_to_stderr = 1;
862-
strvec_push(&unpack.args, "unpack-objects");
863-
if (!show_stats)
864-
strvec_push(&unpack.args, "-q");
865-
866-
return run_command(&unpack);
867-
}
868-
869779
static void end_packfile(void)
870780
{
871781
static int running;
@@ -876,10 +786,7 @@ static void end_packfile(void)
876786
running = 1;
877787
clear_delta_base_cache();
878788
if (object_count) {
879-
struct odb_source_files *files = odb_source_files_downcast(pack_data->repo->objects->sources);
880-
struct packed_git *new_p;
881789
struct object_id cur_pack_oid;
882-
char *idx_name;
883790
int i;
884791
struct branch *b;
885792
struct tag *t;
@@ -891,26 +798,25 @@ static void end_packfile(void)
891798
object_count, cur_pack_oid.hash,
892799
pack_size);
893800

894-
if (object_count <= unpack_limit) {
895-
if (!loosen_small_pack(pack_data)) {
896-
invalidate_pack_id(pack_id);
897-
goto discard_pack;
898-
}
899-
}
801+
if (lseek(pack_data->pack_fd, 0, SEEK_SET) < 0)
802+
die_errno(_("failed seeking to start of '%s'"),
803+
pack_data->pack_name);
900804

901-
close(pack_data->pack_fd);
902-
idx_name = keep_pack(create_index());
805+
if (odb_write_packfile(the_repository->objects,
806+
pack_data->pack_fd, NULL))
807+
die(_("failed to ingest pack"));
903808

904-
/* Register the packfile with core git's machinery. */
905-
new_p = packfile_store_load_pack(files->packed, idx_name, 1);
906-
if (!new_p)
907-
die(_("core Git rejected index %s"), idx_name);
908-
all_packs[pack_id] = new_p;
909-
free(idx_name);
809+
/*
810+
* Non-files backends do not register a pack on disk,
811+
* so NULL out the slot to prevent use-after-free in
812+
* gfi_unpack_entry.
813+
*/
814+
all_packs[pack_id] = NULL;
910815

911816
/* Print the boundary */
912817
if (pack_edges) {
913-
fprintf(pack_edges, "%s:", new_p->pack_name);
818+
fprintf(pack_edges, "pack-%s:",
819+
hash_to_hex(pack_data->hash));
914820
for (i = 0; i < branch_table_sz; i++) {
915821
for (b = branch_table[i]; b; b = b->table_next_branch) {
916822
if (b->pack_id == pack_id)
@@ -930,7 +836,6 @@ static void end_packfile(void)
930836
pack_id++;
931837
}
932838
else {
933-
discard_pack:
934839
close(pack_data->pack_fd);
935840
unlink_or_warn(pack_data->pack_name);
936841
}
@@ -956,7 +861,6 @@ static int store_object(
956861
struct object_id *oidout,
957862
uintmax_t mark)
958863
{
959-
struct odb_source *source;
960864
void *out, *delta;
961865
struct object_entry *e;
962866
unsigned char hdr[96];
@@ -982,11 +886,7 @@ static int store_object(
982886
return 1;
983887
}
984888

985-
for (source = the_repository->objects->sources; source; source = source->next) {
986-
struct odb_source_files *files = odb_source_files_downcast(source);
987-
988-
if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid))
989-
continue;
889+
if (odb_has_object(the_repository->objects, &oid, 0)) {
990890
e->type = type;
991891
e->pack_id = MAX_PACK_ID;
992892
e->idx.offset = 1; /* just not zero! */
@@ -1106,7 +1006,6 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
11061006
size_t in_sz = 64 * 1024, out_sz = 64 * 1024;
11071007
unsigned char *in_buf = xmalloc(in_sz);
11081008
unsigned char *out_buf = xmalloc(out_sz);
1109-
struct odb_source *source;
11101009
struct object_entry *e;
11111010
struct object_id oid;
11121011
unsigned long hdrlen;
@@ -1189,11 +1088,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
11891088
goto out;
11901089
}
11911090

1192-
for (source = the_repository->objects->sources; source; source = source->next) {
1193-
struct odb_source_files *files = odb_source_files_downcast(source);
1194-
1195-
if (!packfile_list_find_oid(packfile_store_get_packs(files->packed), &oid))
1196-
continue;
1091+
if (odb_has_object(the_repository->objects, &oid, 0)) {
11971092
e->type = OBJ_BLOB;
11981093
e->pack_id = MAX_PACK_ID;
11991094
e->idx.offset = 1; /* just not zero! */
@@ -1239,6 +1134,16 @@ static void *gfi_unpack_entry(
12391134
{
12401135
enum object_type type;
12411136
struct packed_git *p = all_packs[oe->pack_id];
1137+
if (!p) {
1138+
/*
1139+
* Pack was ingested by a non-files backend via
1140+
* odb_write_packfile() and is no longer on disk.
1141+
* Read the object back through the ODB instead.
1142+
*/
1143+
enum object_type odb_type;
1144+
return odb_read_object(the_repository->objects,
1145+
&oe->idx.oid, &odb_type, sizep);
1146+
}
12421147
if (p == pack_data && p->pack_size < (pack_size + the_hash_algo->rawsz)) {
12431148
/* The object is stored in the packfile we are writing to
12441149
* and we have modified it since the last time we scanned

builtin/pack-objects.c

Lines changed: 27 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,57 +1541,15 @@ static int have_duplicate_entry(const struct object_id *oid,
15411541

15421542
static int want_cruft_object_mtime(struct repository *r,
15431543
const struct object_id *oid,
1544-
unsigned flags, uint32_t mtime)
1544+
unsigned flags UNUSED, uint32_t mtime UNUSED)
15451545
{
1546-
struct odb_source *source;
1547-
1548-
for (source = r->objects->sources; source; source = source->next) {
1549-
struct odb_source_files *files = odb_source_files_downcast(source);
1550-
struct packed_git **cache = packfile_store_get_kept_pack_cache(files->packed, flags);
1551-
1552-
for (; *cache; cache++) {
1553-
struct packed_git *p = *cache;
1554-
off_t ofs;
1555-
uint32_t candidate_mtime;
1556-
1557-
ofs = find_pack_entry_one(oid, p);
1558-
if (!ofs)
1559-
continue;
1560-
1561-
/*
1562-
* We have a copy of the object 'oid' in a non-cruft
1563-
* pack. We can avoid packing an additional copy
1564-
* regardless of what the existing copy's mtime is since
1565-
* it is outside of a cruft pack.
1566-
*/
1567-
if (!p->is_cruft)
1568-
return 0;
1569-
1570-
/*
1571-
* If we have a copy of the object 'oid' in a cruft
1572-
* pack, then either read the cruft pack's mtime for
1573-
* that object, or, if that can't be loaded, assume the
1574-
* pack's mtime itself.
1575-
*/
1576-
if (!load_pack_mtimes(p)) {
1577-
uint32_t pos;
1578-
if (offset_to_pack_pos(p, ofs, &pos) < 0)
1579-
continue;
1580-
candidate_mtime = nth_packed_mtime(p, pos);
1581-
} else {
1582-
candidate_mtime = p->mtime;
1583-
}
1584-
1585-
/*
1586-
* We have a surviving copy of the object in a cruft
1587-
* pack whose mtime is greater than or equal to the one
1588-
* we are considering. We can thus avoid packing an
1589-
* additional copy of that object.
1590-
*/
1591-
if (mtime <= candidate_mtime)
1592-
return 0;
1593-
}
1594-
}
1546+
/*
1547+
* Check if the object exists in a kept source. Dispatches through
1548+
* the vtable: files backends check kept packs, non-files backends
1549+
* check their own kept tracking via OBJECT_INFO_KEPT_ONLY.
1550+
*/
1551+
if (odb_has_object_kept(r->objects, oid))
1552+
return 0;
15951553

15961554
return -1;
15971555
}
@@ -1657,7 +1615,7 @@ static int want_found_object(const struct object_id *oid, int exclude,
16571615
return 0;
16581616
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
16591617
return 0;
1660-
if (has_object_kept_pack(p->repo, oid, flags))
1618+
if (odb_has_object_kept(p->repo->objects, oid))
16611619
return 0;
16621620
} else {
16631621
/*
@@ -1726,8 +1684,6 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
17261684
uint32_t found_mtime)
17271685
{
17281686
int want;
1729-
struct packfile_list_entry *e;
1730-
struct odb_source *source;
17311687

17321688
if (!exclude && local) {
17331689
/*
@@ -1757,25 +1713,18 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
17571713

17581714
odb_prepare_alternates(the_repository->objects);
17591715

1760-
for (source = the_repository->objects->sources; source; source = source->next) {
1761-
struct multi_pack_index *m = get_multi_pack_index(source);
1762-
struct pack_entry e;
1763-
1764-
if (m && fill_midx_entry(m, oid, &e)) {
1765-
want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime);
1766-
if (want != -1)
1767-
return want;
1768-
}
1769-
}
1770-
1771-
for (source = the_repository->objects->sources; source; source = source->next) {
1772-
struct odb_source_files *files = odb_source_files_downcast(source);
1716+
{
1717+
struct object_info oi = OBJECT_INFO_INIT;
17731718

1774-
for (e = files->packed->packs.head; e; e = e->next) {
1775-
struct packed_git *p = e->pack;
1776-
want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
1777-
if (!exclude && want > 0)
1778-
packfile_list_prepend(&files->packed->packs, p);
1719+
if (!odb_read_object_info_extended(the_repository->objects,
1720+
oid, &oi,
1721+
OBJECT_INFO_QUICK) &&
1722+
oi.whence == OI_PACKED) {
1723+
struct packed_git *p = oi.u.packed.pack;
1724+
want = want_object_in_pack_one(p, oid, exclude,
1725+
found_pack,
1726+
found_offset,
1727+
found_mtime);
17791728
if (want != -1)
17801729
return want;
17811730
}
@@ -4065,7 +4014,7 @@ static void show_cruft_commit(struct commit *commit, void *data)
40654014

40664015
static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
40674016
{
4068-
return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE);
4017+
return !odb_has_object_kept(to_pack.repo->objects, &obj->oid);
40694018
}
40704019

40714020
static int cruft_include_check(struct commit *commit, void *data)
@@ -4365,17 +4314,15 @@ static void add_objects_in_unpacked_packs(void)
43654314

43664315
odb_prepare_alternates(to_pack.repo->objects);
43674316
for (source = to_pack.repo->objects->sources; source; source = source->next) {
4368-
struct odb_source_files *files = odb_source_files_downcast(source);
4369-
43704317
if (!source->local)
43714318
continue;
43724319

4373-
if (packfile_store_for_each_object(files->packed, &oi,
4374-
add_object_in_unpacked_pack, NULL,
4375-
ODB_FOR_EACH_OBJECT_PACK_ORDER |
4376-
ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
4377-
ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
4378-
ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
4320+
if (odb_source_for_each_object(source, &oi,
4321+
add_object_in_unpacked_pack, NULL,
4322+
ODB_FOR_EACH_OBJECT_PACK_ORDER |
4323+
ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
4324+
ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
4325+
ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
43794326
die(_("cannot open pack index"));
43804327
}
43814328
}

0 commit comments

Comments
 (0)