|
| 1 | +#include "test-tool.h" |
| 2 | +#include "git-compat-util.h" |
| 3 | +#include "hex.h" |
| 4 | +#include "object.h" |
| 5 | +#include "strbuf.h" |
| 6 | +#include "parse-options.h" |
| 7 | +#include "hash.h" |
| 8 | +#include "git-zlib.h" |
| 9 | + |
| 10 | +/* |
| 11 | + * Write a pack object header for the given type and size. |
| 12 | + * Returns the number of bytes written to the buffer. |
| 13 | + */ |
| 14 | +static size_t write_pack_object_header(unsigned char *buf, enum object_type type, size_t size) |
| 15 | +{ |
| 16 | + unsigned char *p = buf; |
| 17 | + *p = (type << 4) | (size & 0xf); |
| 18 | + size >>= 4; |
| 19 | + while (size) { |
| 20 | + *p++ |= 0x80; |
| 21 | + *p = size & 0x7f; |
| 22 | + size >>= 7; |
| 23 | + } |
| 24 | + p++; |
| 25 | + return p - buf; |
| 26 | +} |
| 27 | + |
| 28 | +/* |
| 29 | + * Write data as an uncompressed zlib stream. |
| 30 | + * For data larger than 64KB, writes multiple uncompressed blocks. |
| 31 | + * If data is NULL, writes zeros. |
| 32 | + * Updates the pack checksum context. |
| 33 | + */ |
| 34 | +static void write_uncompressed_zlib(FILE *f, struct git_hash_ctx *pack_ctx, |
| 35 | + const void *data, size_t len, |
| 36 | + const struct git_hash_algo *algo) |
| 37 | +{ |
| 38 | + unsigned char zlib_header[2] = { 0x78, 0x01 }; /* CMF, FLG */ |
| 39 | + unsigned char block_header[5]; |
| 40 | + static unsigned char zeros[0xffff]; |
| 41 | + const unsigned char *p = data; |
| 42 | + size_t remaining = len; |
| 43 | + uint32_t adler = 1L; /* adler32 initial value */ |
| 44 | + unsigned char adler_buf[4]; |
| 45 | + |
| 46 | + /* Write zlib header */ |
| 47 | + fwrite(zlib_header, 1, 2, f); |
| 48 | + algo->update_fn(pack_ctx, zlib_header, 2); |
| 49 | + |
| 50 | + /* Write uncompressed blocks (max 64KB each) */ |
| 51 | + do { |
| 52 | + size_t block_len = remaining > 0xffff ? 0xffff : remaining; |
| 53 | + int is_final = (block_len == remaining); |
| 54 | + const unsigned char *block_data = data ? p : zeros; |
| 55 | + |
| 56 | + block_header[0] = is_final ? 0x01 : 0x00; |
| 57 | + block_header[1] = block_len & 0xff; |
| 58 | + block_header[2] = (block_len >> 8) & 0xff; |
| 59 | + block_header[3] = block_header[1] ^ 0xff; |
| 60 | + block_header[4] = block_header[2] ^ 0xff; |
| 61 | + |
| 62 | + fwrite(block_header, 1, 5, f); |
| 63 | + algo->update_fn(pack_ctx, block_header, 5); |
| 64 | + |
| 65 | + if (block_len) { |
| 66 | + fwrite(block_data, 1, block_len, f); |
| 67 | + algo->update_fn(pack_ctx, block_data, block_len); |
| 68 | + adler = adler32(adler, block_data, block_len); |
| 69 | + } |
| 70 | + |
| 71 | + if (data) |
| 72 | + p += block_len; |
| 73 | + remaining -= block_len; |
| 74 | + } while (remaining > 0); |
| 75 | + |
| 76 | + /* Write adler32 checksum */ |
| 77 | + put_be32(adler_buf, adler); |
| 78 | + fwrite(adler_buf, 1, 4, f); |
| 79 | + algo->update_fn(pack_ctx, adler_buf, 4); |
| 80 | +} |
| 81 | + |
| 82 | +/* |
| 83 | + * Write an uncompressed object to the pack file. |
| 84 | + * Updates the pack checksum context. |
| 85 | + */ |
| 86 | +static void write_pack_object(FILE *f, struct git_hash_ctx *pack_ctx, |
| 87 | + enum object_type type, const void *data, size_t len, |
| 88 | + const struct git_hash_algo *algo) |
| 89 | +{ |
| 90 | + unsigned char header[32]; |
| 91 | + size_t header_len; |
| 92 | + |
| 93 | + /* Write pack object header */ |
| 94 | + header_len = write_pack_object_header(header, type, len); |
| 95 | + fwrite(header, 1, header_len, f); |
| 96 | + algo->update_fn(pack_ctx, header, header_len); |
| 97 | + |
| 98 | + /* Write the data as uncompressed zlib */ |
| 99 | + write_uncompressed_zlib(f, pack_ctx, data, len, algo); |
| 100 | +} |
| 101 | + |
| 102 | +/* |
| 103 | + * Compute the object ID for a given object. |
| 104 | + */ |
| 105 | +static void hash_object(struct object_id *oid, enum object_type type, |
| 106 | + const void *data, size_t len, |
| 107 | + const struct git_hash_algo *algo) |
| 108 | +{ |
| 109 | + struct git_hash_ctx ctx; |
| 110 | + char header[32]; |
| 111 | + int header_len; |
| 112 | + |
| 113 | + header_len = xsnprintf(header, sizeof(header), "%s %"PRIuMAX, |
| 114 | + type_name(type), (uintmax_t)len) + 1; |
| 115 | + |
| 116 | + algo->init_fn(&ctx); |
| 117 | + algo->update_fn(&ctx, header, header_len); |
| 118 | + algo->update_fn(&ctx, data, len); |
| 119 | + algo->final_oid_fn(oid, &ctx); |
| 120 | +} |
| 121 | + |
| 122 | +/* |
| 123 | + * Compute the object ID for a large object filled with zeros. |
| 124 | + */ |
| 125 | +static void hash_large_zero_object(struct object_id *oid, enum object_type type, |
| 126 | + size_t len, const struct git_hash_algo *algo) |
| 127 | +{ |
| 128 | + struct git_hash_ctx ctx; |
| 129 | + char header[32]; |
| 130 | + int header_len; |
| 131 | + static unsigned char zeros[0xffff]; |
| 132 | + size_t remaining; |
| 133 | + |
| 134 | + header_len = xsnprintf(header, sizeof(header), "%s %"PRIuMAX, |
| 135 | + type_name(type), (uintmax_t)len) + 1; |
| 136 | + |
| 137 | + algo->init_fn(&ctx); |
| 138 | + algo->update_fn(&ctx, header, header_len); |
| 139 | + for (remaining = len; remaining; ) { |
| 140 | + size_t chunk = remaining > sizeof(zeros) ? sizeof(zeros) : remaining; |
| 141 | + algo->update_fn(&ctx, zeros, chunk); |
| 142 | + remaining -= chunk; |
| 143 | + } |
| 144 | + algo->final_oid_fn(oid, &ctx); |
| 145 | +} |
| 146 | + |
| 147 | +/* |
| 148 | + * Write a large object (zeros) to the pack file, streaming to avoid |
| 149 | + * allocating the entire object in memory. |
| 150 | + */ |
| 151 | +static void write_large_pack_object(FILE *f, struct git_hash_ctx *pack_ctx, |
| 152 | + enum object_type type, size_t len, |
| 153 | + struct object_id *oid, |
| 154 | + const struct git_hash_algo *algo) |
| 155 | +{ |
| 156 | + unsigned char header[32]; |
| 157 | + size_t header_len; |
| 158 | + |
| 159 | + /* Compute the object ID */ |
| 160 | + hash_large_zero_object(oid, type, len, algo); |
| 161 | + |
| 162 | + /* Write pack object header */ |
| 163 | + header_len = write_pack_object_header(header, type, len); |
| 164 | + fwrite(header, 1, header_len, f); |
| 165 | + algo->update_fn(pack_ctx, header, header_len); |
| 166 | + |
| 167 | + /* Write the zeros as uncompressed zlib (NULL means zeros) */ |
| 168 | + write_uncompressed_zlib(f, pack_ctx, NULL, len, algo); |
| 169 | +} |
| 170 | + |
| 171 | +/* |
| 172 | + * Generate a pack file with a single large (>4GB) reachable object. |
| 173 | + * |
| 174 | + * Creates: |
| 175 | + * 1. A large blob (all NUL bytes) |
| 176 | + * 2. A tree containing that blob as "file" |
| 177 | + * 3. A commit using that tree |
| 178 | + * 4. The empty tree |
| 179 | + * 5. A child commit using the empty tree |
| 180 | + * |
| 181 | + * This is useful for testing that Git can handle objects larger than 4GB. |
| 182 | + */ |
| 183 | +static int generate_pack_with_large_object(const char *path, size_t blob_size, |
| 184 | + const struct git_hash_algo *algo) |
| 185 | +{ |
| 186 | + FILE *f = fopen_for_writing(path); |
| 187 | + struct git_hash_ctx pack_ctx; |
| 188 | + char header[1024]; |
| 189 | + struct object_id blob_oid, tree_oid, commit_oid, empty_tree_oid, final_commit_oid; |
| 190 | + struct strbuf buf = STRBUF_INIT; |
| 191 | + size_t object_count = 5; /* large blob, tree, commit, empty tree, final commit */ |
| 192 | + |
| 193 | + algo->init_fn(&pack_ctx); |
| 194 | + |
| 195 | + /* Write pack header */ |
| 196 | + memcpy(header, "PACK", 4); |
| 197 | + put_be32(header + 4, 2); |
| 198 | + put_be32(header + 8, object_count); |
| 199 | + fwrite(header, 1, 12, f); |
| 200 | + algo->update_fn(&pack_ctx, header, 12); |
| 201 | + |
| 202 | + /* 1. Write the large blob */ |
| 203 | + write_large_pack_object(f, &pack_ctx, OBJ_BLOB, blob_size, &blob_oid, algo); |
| 204 | + |
| 205 | + /* 2. Write tree containing the blob as "file" */ |
| 206 | + strbuf_addf(&buf, "100644 file%c", '\0'); |
| 207 | + strbuf_add(&buf, blob_oid.hash, algo->rawsz); |
| 208 | + hash_object(&tree_oid, OBJ_TREE, buf.buf, buf.len, algo); |
| 209 | + write_pack_object(f, &pack_ctx, OBJ_TREE, buf.buf, buf.len, algo); |
| 210 | + |
| 211 | + /* 3. Write commit using that tree */ |
| 212 | + strbuf_reset(&buf); |
| 213 | + strbuf_addf(&buf, |
| 214 | + "tree %s\n" |
| 215 | + "author A U Thor <author@example.com> 1234567890 +0000\n" |
| 216 | + "committer C O Mitter <committer@example.com> 1234567890 +0000\n" |
| 217 | + "\n" |
| 218 | + "Large blob commit\n", |
| 219 | + oid_to_hex(&tree_oid)); |
| 220 | + hash_object(&commit_oid, OBJ_COMMIT, buf.buf, buf.len, algo); |
| 221 | + write_pack_object(f, &pack_ctx, OBJ_COMMIT, buf.buf, buf.len, algo); |
| 222 | + |
| 223 | + /* 4. Write the empty tree */ |
| 224 | + hash_object(&empty_tree_oid, OBJ_TREE, "", 0, algo); |
| 225 | + write_pack_object(f, &pack_ctx, OBJ_TREE, "", 0, algo); |
| 226 | + |
| 227 | + /* 5. Write final commit using empty tree, with previous commit as parent */ |
| 228 | + strbuf_reset(&buf); |
| 229 | + strbuf_addf(&buf, |
| 230 | + "tree %s\n" |
| 231 | + "parent %s\n" |
| 232 | + "author A U Thor <author@example.com> 1234567890 +0000\n" |
| 233 | + "committer C O Mitter <committer@example.com> 1234567890 +0000\n" |
| 234 | + "\n" |
| 235 | + "Empty tree commit\n", |
| 236 | + oid_to_hex(&empty_tree_oid), |
| 237 | + oid_to_hex(&commit_oid)); |
| 238 | + hash_object(&final_commit_oid, OBJ_COMMIT, buf.buf, buf.len, algo); |
| 239 | + write_pack_object(f, &pack_ctx, OBJ_COMMIT, buf.buf, buf.len, algo); |
| 240 | + |
| 241 | + /* Write pack trailer (checksum) */ |
| 242 | + algo->final_fn((unsigned char *)header, &pack_ctx); |
| 243 | + fwrite(header, 1, algo->rawsz, f); |
| 244 | + |
| 245 | + fclose(f); |
| 246 | + |
| 247 | + strbuf_release(&buf); |
| 248 | + |
| 249 | + /* Print the final commit OID so caller can set up refs */ |
| 250 | + printf("%s\n", oid_to_hex(&final_commit_oid)); |
| 251 | + |
| 252 | + return 0; |
| 253 | +} |
| 254 | + |
| 255 | +static int cmd__synthesize__pack(int argc, const char **argv, |
| 256 | + const char *prefix UNUSED, |
| 257 | + struct repository *repo UNUSED) |
| 258 | +{ |
| 259 | + const struct git_hash_algo *algo = hash_algos + GIT_HASH_SHA1; |
| 260 | + size_t count; |
| 261 | + const char *path; |
| 262 | + const char * const usage[] = { |
| 263 | + "test-tool synthesize pack <count> <filename>", |
| 264 | + NULL |
| 265 | + }; |
| 266 | + struct option options[] = { |
| 267 | + OPT_END() |
| 268 | + }; |
| 269 | + |
| 270 | + argc = parse_options(argc, argv, NULL, options, usage, |
| 271 | + PARSE_OPT_KEEP_ARGV0); |
| 272 | + if (argc != 3) |
| 273 | + usage_with_options(usage, options); |
| 274 | + |
| 275 | + count = strtoumax(argv[1], NULL, 10); |
| 276 | + path = argv[2]; |
| 277 | + |
| 278 | + return !!generate_pack_with_large_object(path, count, algo); |
| 279 | +} |
| 280 | + |
| 281 | +int cmd__synthesize(int argc, const char **argv) |
| 282 | +{ |
| 283 | + const char *prefix = NULL; |
| 284 | + char const * const synthesize_usage[] = { |
| 285 | + "test-tool synthesize pack <options>", |
| 286 | + NULL, |
| 287 | + }; |
| 288 | + parse_opt_subcommand_fn *fn = NULL; |
| 289 | + struct option options[] = { |
| 290 | + OPT_SUBCOMMAND("pack", &fn, cmd__synthesize__pack), |
| 291 | + OPT_END() |
| 292 | + }; |
| 293 | + argc = parse_options(argc, argv, prefix, options, synthesize_usage, 0); |
| 294 | + return !!fn(argc, argv, prefix, NULL); |
| 295 | +} |
0 commit comments