|
4 | 4 | #include "hex.h" |
5 | 5 | #include "lockfile.h" |
6 | 6 | #include "packfile.h" |
| 7 | +#include "pack.h" |
7 | 8 | #include "object-file.h" |
8 | 9 | #include "hash-lookup.h" |
9 | 10 | #include "midx.h" |
@@ -1251,6 +1252,136 @@ struct write_midx_opts { |
1251 | 1252 | unsigned flags; |
1252 | 1253 | }; |
1253 | 1254 |
|
| 1255 | +struct batched_pack { |
| 1256 | + char *idx_name; |
| 1257 | + uint32_t num_objects; |
| 1258 | +}; |
| 1259 | + |
| 1260 | +struct batched_pack_collection { |
| 1261 | + struct multi_pack_index *base_midx; |
| 1262 | + struct string_list *packs_to_include; |
| 1263 | + struct batched_pack *packs; |
| 1264 | + size_t nr; |
| 1265 | + size_t alloc; |
| 1266 | + int ret; |
| 1267 | +}; |
| 1268 | + |
| 1269 | +static int read_pack_idx_num_objects(const char *idx_path, |
| 1270 | + uint32_t *num_objects) |
| 1271 | +{ |
| 1272 | + unsigned char header[8]; |
| 1273 | + unsigned char fanout[4]; |
| 1274 | + off_t fanout_offset = 255 * sizeof(uint32_t); |
| 1275 | + int fd = git_open(idx_path); |
| 1276 | + int ret = 0; |
| 1277 | + |
| 1278 | + if (fd < 0) |
| 1279 | + return error_errno(_("could not open pack-index '%s'"), idx_path); |
| 1280 | + |
| 1281 | + if (read_in_full(fd, header, sizeof(header)) != sizeof(header)) { |
| 1282 | + ret = error_errno(_("could not read pack-index header '%s'"), |
| 1283 | + idx_path); |
| 1284 | + goto cleanup; |
| 1285 | + } |
| 1286 | + |
| 1287 | + if (get_be32(header) == PACK_IDX_SIGNATURE) { |
| 1288 | + uint32_t version = get_be32(header + sizeof(uint32_t)); |
| 1289 | + if (version != 2) { |
| 1290 | + ret = error(_("pack-index '%s' is version %"PRIu32 |
| 1291 | + " and is not supported"), |
| 1292 | + idx_path, version); |
| 1293 | + goto cleanup; |
| 1294 | + } |
| 1295 | + fanout_offset += sizeof(header); |
| 1296 | + } |
| 1297 | + |
| 1298 | + if (pread_in_full(fd, fanout, sizeof(fanout), fanout_offset) != |
| 1299 | + sizeof(fanout)) { |
| 1300 | + ret = error_errno(_("could not read pack-index fanout '%s'"), |
| 1301 | + idx_path); |
| 1302 | + goto cleanup; |
| 1303 | + } |
| 1304 | + |
| 1305 | + *num_objects = get_be32(fanout); |
| 1306 | + |
| 1307 | +cleanup: |
| 1308 | + close(fd); |
| 1309 | + return ret; |
| 1310 | +} |
| 1311 | + |
| 1312 | +static void collect_candidate_pack(const char *full_path, |
| 1313 | + size_t full_path_len UNUSED, |
| 1314 | + const char *file_name, |
| 1315 | + void *data) |
| 1316 | +{ |
| 1317 | + struct batched_pack_collection *collection = data; |
| 1318 | + uint32_t num_objects; |
| 1319 | + |
| 1320 | + if (collection->ret) |
| 1321 | + return; |
| 1322 | + if (!ends_with(file_name, ".idx")) |
| 1323 | + return; |
| 1324 | + if (collection->base_midx && |
| 1325 | + midx_contains_pack(collection->base_midx, file_name)) |
| 1326 | + return; |
| 1327 | + if (collection->packs_to_include && |
| 1328 | + !string_list_has_string(collection->packs_to_include, file_name)) |
| 1329 | + return; |
| 1330 | + |
| 1331 | + if (read_pack_idx_num_objects(full_path, &num_objects) < 0) { |
| 1332 | + collection->ret = -1; |
| 1333 | + return; |
| 1334 | + } |
| 1335 | + |
| 1336 | + ALLOC_GROW(collection->packs, collection->nr + 1, collection->alloc); |
| 1337 | + collection->packs[collection->nr].idx_name = xstrdup(file_name); |
| 1338 | + collection->packs[collection->nr].num_objects = num_objects; |
| 1339 | + collection->nr++; |
| 1340 | +} |
| 1341 | + |
| 1342 | +static void clear_batched_packs(struct batched_pack *packs, size_t packs_nr) |
| 1343 | +{ |
| 1344 | + for (size_t i = 0; i < packs_nr; i++) |
| 1345 | + free(packs[i].idx_name); |
| 1346 | + free(packs); |
| 1347 | +} |
| 1348 | + |
| 1349 | +static int collect_candidate_packs(struct odb_source *source, |
| 1350 | + struct multi_pack_index *base_midx, |
| 1351 | + struct string_list *packs_to_include, |
| 1352 | + struct batched_pack **packs, |
| 1353 | + size_t *packs_nr) |
| 1354 | +{ |
| 1355 | + struct batched_pack_collection collection = { |
| 1356 | + .base_midx = base_midx, |
| 1357 | + .packs_to_include = packs_to_include, |
| 1358 | + }; |
| 1359 | + |
| 1360 | + for_each_file_in_pack_dir(source->path, collect_candidate_pack, |
| 1361 | + &collection); |
| 1362 | + |
| 1363 | + if (collection.ret) { |
| 1364 | + clear_batched_packs(collection.packs, collection.nr); |
| 1365 | + return -1; |
| 1366 | + } |
| 1367 | + |
| 1368 | + *packs = collection.packs; |
| 1369 | + *packs_nr = collection.nr; |
| 1370 | + return 0; |
| 1371 | +} |
| 1372 | + |
| 1373 | +static int batched_pack_cmp_objects_desc(const void *va, const void *vb) |
| 1374 | +{ |
| 1375 | + const struct batched_pack *a = va; |
| 1376 | + const struct batched_pack *b = vb; |
| 1377 | + |
| 1378 | + if (a->num_objects > b->num_objects) |
| 1379 | + return -1; |
| 1380 | + if (a->num_objects < b->num_objects) |
| 1381 | + return 1; |
| 1382 | + return strcmp(a->idx_name, b->idx_name); |
| 1383 | +} |
| 1384 | + |
1254 | 1385 | static int write_midx_internal(struct write_midx_opts *opts) |
1255 | 1386 | { |
1256 | 1387 | struct repository *r = opts->source->odb->repo; |
@@ -1881,6 +2012,98 @@ int write_midx_file_only(struct odb_source *source, |
1881 | 2012 | return write_midx_internal(&opts); |
1882 | 2013 | } |
1883 | 2014 |
|
| 2015 | +int write_midx_file_batched(struct odb_source *source, |
| 2016 | + struct string_list *packs_to_include, |
| 2017 | + const char *preferred_pack_name, |
| 2018 | + const char *refs_snapshot, |
| 2019 | + uint32_t max_objects_per_layer, |
| 2020 | + unsigned flags) |
| 2021 | +{ |
| 2022 | + struct repository *r = source->odb->repo; |
| 2023 | + struct batched_pack *candidates = NULL; |
| 2024 | + size_t candidates_nr = 0; |
| 2025 | + size_t i = 0; |
| 2026 | + int result = 0; |
| 2027 | + |
| 2028 | + if (!max_objects_per_layer) |
| 2029 | + return error(_("--max-objects-per-layer must be greater than zero")); |
| 2030 | + if (flags & MIDX_WRITE_COMPACT) |
| 2031 | + return error(_("--max-objects-per-layer is incompatible with compaction")); |
| 2032 | + if (flags & MIDX_WRITE_NO_CHAIN) |
| 2033 | + return error(_("--max-objects-per-layer is incompatible with --no-write-chain-file")); |
| 2034 | + |
| 2035 | + flags |= MIDX_WRITE_INCREMENTAL; |
| 2036 | + |
| 2037 | + odb_reprepare(r->objects); |
| 2038 | + if (collect_candidate_packs(source, get_multi_pack_index(source), |
| 2039 | + packs_to_include, &candidates, |
| 2040 | + &candidates_nr) < 0) |
| 2041 | + return -1; |
| 2042 | + if (!candidates_nr) |
| 2043 | + goto cleanup; |
| 2044 | + |
| 2045 | + QSORT(candidates, candidates_nr, batched_pack_cmp_objects_desc); |
| 2046 | + |
| 2047 | + while (i < candidates_nr) { |
| 2048 | + struct string_list batch = STRING_LIST_INIT_DUP; |
| 2049 | + uint64_t batch_objects = 0; |
| 2050 | + int batch_has_preferred_pack = 0; |
| 2051 | + struct write_midx_opts opts; |
| 2052 | + |
| 2053 | + do { |
| 2054 | + struct batched_pack *candidate = &candidates[i]; |
| 2055 | + |
| 2056 | + string_list_append(&batch, candidate->idx_name); |
| 2057 | + if (preferred_pack_name && |
| 2058 | + !cmp_idx_or_pack_name(preferred_pack_name, |
| 2059 | + candidate->idx_name)) |
| 2060 | + batch_has_preferred_pack = 1; |
| 2061 | + batch_objects += candidate->num_objects; |
| 2062 | + i++; |
| 2063 | + } while (i < candidates_nr && |
| 2064 | + batch_objects + candidates[i].num_objects <= |
| 2065 | + max_objects_per_layer); |
| 2066 | + |
| 2067 | + string_list_sort(&batch); |
| 2068 | + |
| 2069 | + memset(&opts, 0, sizeof(opts)); |
| 2070 | + opts.source = source; |
| 2071 | + opts.packs_to_include = &batch; |
| 2072 | + opts.preferred_pack_name = batch_has_preferred_pack ? |
| 2073 | + preferred_pack_name : NULL; |
| 2074 | + opts.refs_snapshot = refs_snapshot; |
| 2075 | + opts.flags = flags; |
| 2076 | + |
| 2077 | + trace2_region_enter("midx", "write_midx_batched_step", r); |
| 2078 | + trace2_data_intmax("midx", r, "batch:packs", |
| 2079 | + (intmax_t)batch.nr); |
| 2080 | + trace2_data_intmax("midx", r, "batch:objects", |
| 2081 | + (intmax_t)batch_objects); |
| 2082 | + |
| 2083 | + if (write_midx_internal(&opts) < 0) |
| 2084 | + result = -1; |
| 2085 | + |
| 2086 | + string_list_clear(&batch, 0); |
| 2087 | + |
| 2088 | + /* |
| 2089 | + * Reload the object database so the next in-process write sees |
| 2090 | + * the MIDX layer that the previous iteration just linked into |
| 2091 | + * the chain file. |
| 2092 | + */ |
| 2093 | + odb_close(r->objects); |
| 2094 | + odb_reprepare(r->objects); |
| 2095 | + |
| 2096 | + trace2_region_leave("midx", "write_midx_batched_step", r); |
| 2097 | + |
| 2098 | + if (result) |
| 2099 | + break; |
| 2100 | + } |
| 2101 | + |
| 2102 | +cleanup: |
| 2103 | + clear_batched_packs(candidates, candidates_nr); |
| 2104 | + return result; |
| 2105 | +} |
| 2106 | + |
1884 | 2107 | int write_midx_file_compact(struct odb_source *source, |
1885 | 2108 | struct multi_pack_index *from, |
1886 | 2109 | struct multi_pack_index *to, |
|
0 commit comments