From de41d03e1c7ab73174716c99b8eaf7ff5884d6bb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:27 -0400 Subject: [PATCH 01/20] packfile.c: prevent overflow in `nth_packed_object_id()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 37fec86a83 (packfile: abstract away hash constant values, 2018-05-02), `nth_packed_object_id()` started using the variable `the_hash_algo->rawsz` instead of a fixed constant when trying to compute an offset into the ".idx" file for some object position. This can lead to surprising truncation when looking for an object towards the end of a large enough pack, like the following: (gdb) p hashsz $1 = 20 (gdb) p n $2 = 215043814 (gdb) p hashsz * n $3 = 5908984 , which is a debugger session broken on a known-bad call to the `nth_packed_object_id()` function. This behavior predates 37fec86a83, and is original to the v2 index format, via: 74e34e1fca (sha1_file.c: learn about index version 2, 2007-04-09). This is due to §6.4.4.1 of the C99 standard, which states that an untyped integer constant will take the first type in which the value can be accurately represented, among `int`, `long int`, and `long long int`. Since 20 can be represented as an `int`, and `n` is a 32-bit unsigned integer, the resulting computation is defined by §6.3.1.8, and the (signed) integer value representing `n` is converted to an unsigned type, meaning that `20 * n` (for `n` having type `uint32_t`) is equivalent to a multiplication between two unsigned 32-bit integers. When multiplying a sufficiently large `n`, the resulting value can exceed 2^32-1, wrapping around and producing an invalid result. Let's follow the example in f86f769550e (compute pack .idx byte offsets using size_t, 2020-11-13) and replace this computation with `st_mult()`, which will ensure that the computation is done using 64-bits. While here, guard the corresponding computation for packs with v1 indexes, too. Though the likelihood of seeing a bug there is much smaller, since (a) v1 indexes are generated far less frequently than v2 indexes, and (b) they all correspond to packs no larger than 2 GiB, so having enough objects to trigger this overflow is unlikely if not impossible. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- packfile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packfile.c b/packfile.c index fd083c86e0..5ee67de569 100644 --- a/packfile.c +++ b/packfile.c @@ -1920,10 +1920,10 @@ int nth_packed_object_id(struct object_id *oid, return -1; index += 4 * 256; if (p->index_version == 1) { - oidread(oid, index + (hashsz + 4) * n + 4); + oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4)); } else { index += 8; - oidread(oid, index + hashsz * n); + oidread(oid, index + st_mult(hashsz, n)); } return 0; } From 42be681b33ef73be056fb99e3c63c6e9b9c2e7ef Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 13 Jul 2023 20:54:54 -0400 Subject: [PATCH 02/20] packfile.c: prevent overflow in `load_idx()` Prevent an overflow when locating a pack's CRC offset when the number of packed items is greater than 2^32-1/hashsz by guarding the computation with an `st_mult()`. Note that to avoid truncating the result, the `crc_offset` member must itself become a `size_t`. The only usage of this variable (besides the assignment in `load_idx()`) is in `read_v2_anomalous_offsets()` in the index-pack code. There we use the `crc_offset` as a pointer offset, so we are already equipped to handle the type change. Helped-by: Phillip Wood Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- object-store.h | 2 +- packfile.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/object-store.h b/object-store.h index 12415e5ea7..cfe47c9c07 100644 --- a/object-store.h +++ b/object-store.h @@ -110,7 +110,7 @@ struct packed_git { const void *index_data; size_t index_size; uint32_t num_objects; - uint32_t crc_offset; + size_t crc_offset; struct oidset bad_objects; int index_version; time_t mtime; diff --git a/packfile.c b/packfile.c index 5ee67de569..efe4a22c63 100644 --- a/packfile.c +++ b/packfile.c @@ -186,7 +186,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, */ (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); - p->crc_offset = 8 + 4 * 256 + nr * hashsz; + p->crc_offset = st_add(8 + 4 * 256, st_mult(nr, hashsz)); } p->index_version = version; From a519abca02eeca7dce864717b9664c62a124e1c0 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:32 -0400 Subject: [PATCH 03/20] packfile.c: use checked arithmetic in `nth_packed_object_offset()` In a similar spirit as the previous commits, ensure that we use `st_add()` or `st_mult()` when computing values that may overflow the 32-bit unsigned limit. Note that in each of these instances, we prevent 32-bit overflow already since we have explicit casts to `size_t`. So this code is OK as-is, but let's clarify it by using the `st_xyz()` helpers to make it obvious that we are performing the relevant computations using 64 bits. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- packfile.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packfile.c b/packfile.c index efe4a22c63..70837b0d26 100644 --- a/packfile.c +++ b/packfile.c @@ -1948,14 +1948,15 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) const unsigned int hashsz = the_hash_algo->rawsz; index += 4 * 256; if (p->index_version == 1) { - return ntohl(*((uint32_t *)(index + (hashsz + 4) * (size_t)n))); + return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n)))); } else { uint32_t off; - index += 8 + (size_t)p->num_objects * (hashsz + 4); - off = ntohl(*((uint32_t *)(index + 4 * n))); + index += st_add(8, st_mult(p->num_objects, hashsz + 4)); + off = ntohl(*((uint32_t *)(index + st_mult(4, n)))); if (!(off & 0x80000000)) return off; - index += (size_t)p->num_objects * 4 + (off & 0x7fffffff) * 8; + index += st_add(st_mult(p->num_objects, 4), + st_mult(off & 0x7fffffff, 8)); check_pack_index_ptr(p, index); return get_be64(index); } From e6c71f239d18af3b99af8fa2b68f16cee813d1e2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:36 -0400 Subject: [PATCH 04/20] midx.c: use `size_t`'s for fanout nr and alloc The `midx_fanout` struct is used to keep track of a set of OIDs corresponding to each layer of the MIDX's fanout table. It stores an array of entries, along with the number of entries in the table, and the allocated size of the array. Both `nr` and `alloc` are stored as 32-bit unsigned integers. In practice, this should never cause any problems, since most packs have far fewer than 2^32-1 objects. But storing these as `size_t`'s is more appropriate, and prevents us from accidentally overflowing some result when multiplying or adding to either of these values. Update these struct members to be `size_t`'s as appropriate. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/midx.c b/midx.c index b500174d1f..0da2faac67 100644 --- a/midx.c +++ b/midx.c @@ -584,12 +584,14 @@ static void fill_pack_entry(uint32_t pack_int_id, struct midx_fanout { struct pack_midx_entry *entries; - uint32_t nr; - uint32_t alloc; + size_t nr, alloc; }; -static void midx_fanout_grow(struct midx_fanout *fanout, uint32_t nr) +static void midx_fanout_grow(struct midx_fanout *fanout, size_t nr) { + if (nr < fanout->nr) + BUG("negative growth in midx_fanout_grow() (%"PRIuMAX" < %"PRIuMAX")", + (uintmax_t)nr, (uintmax_t)fanout->nr); ALLOC_GROW(fanout->entries, nr, fanout->alloc); } From c2b24ede229dbc6686e37c8cae1e169fc356049e Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:38 -0400 Subject: [PATCH 05/20] midx.c: prevent overflow in `nth_midxed_object_oid()` In a similar spirit as previous commits, avoid overflow when looking up an object's OID in a MIDX when its position is greater than `2^32-1/m->hash_len`. As usual, it is perfectly OK for a MIDX to have as many as 2^32-1 objects (since we use 32-bit fields to count the number of objects at each fanout layer). But if we have more than `2^32-1/m->hash_len` number of objects, we will incorrectly perform the computation using 32-bit integers, overflowing the result. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/midx.c b/midx.c index 0da2faac67..c774cd69c7 100644 --- a/midx.c +++ b/midx.c @@ -254,7 +254,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, if (n >= m->num_objects) return NULL; - oidread(oid, m->chunk_oid_lookup + m->hash_len * n); + oidread(oid, m->chunk_oid_lookup + st_mult(m->hash_len, n)); return oid; } From 5675150cc3bfc03c5721edcfc49fbe43b15b5209 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:41 -0400 Subject: [PATCH 06/20] midx.c: prevent overflow in `nth_midxed_offset()` In a similar spirit as previous patches, avoid an overflow when looking up object offsets in the MIDX's large offset table by guarding the computation via `st_mult()`. This instance is also OK as-is, since the left operand is the result of `sizeof(...)`, which is already a `size_t`. But use `st_mult()` instead here to make it explicit that this computation is to be performed using 64-bit unsigned integers. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/midx.c b/midx.c index c774cd69c7..cf7d06d78b 100644 --- a/midx.c +++ b/midx.c @@ -271,7 +271,8 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; - return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); + return get_be64(m->chunk_large_offsets + + st_mult(sizeof(uint64_t), offset32)); } return offset32; From cc38127439be50ade60fb2db18c7f5f0cc170a36 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:44 -0400 Subject: [PATCH 07/20] midx.c: store `nr`, `alloc` variables as `size_t`'s In the `write_midx_context` structure, we use two `uint32_t`'s to track the length and allocated size of the packs, and one `uint32_t` to track the number of objects in the MIDX. In practice, having these be 32-bit unsigned values shouldn't cause any problems since we are unlikely to have that many objects or packs in any real-world repository. But these values should be `size_t`'s, so change their type to reflect that. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/midx.c b/midx.c index cf7d06d78b..909639eea5 100644 --- a/midx.c +++ b/midx.c @@ -446,14 +446,14 @@ static int idx_or_pack_name_cmp(const void *_va, const void *_vb) struct write_midx_context { struct pack_info *info; - uint32_t nr; - uint32_t alloc; + size_t nr; + size_t alloc; struct multi_pack_index *m; struct progress *progress; unsigned pack_paths_checked; struct pack_midx_entry *entries; - uint32_t entries_nr; + size_t entries_nr; uint32_t *pack_perm; uint32_t *pack_order; @@ -671,17 +671,18 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, struct pack_info *info, uint32_t nr_packs, - uint32_t *nr_objects, + size_t *nr_objects, int preferred_pack) { uint32_t cur_fanout, cur_pack, cur_object; - uint32_t alloc_objects, total_objects = 0; + size_t alloc_objects, total_objects = 0; struct midx_fanout fanout = { 0 }; struct pack_midx_entry *deduplicated_entries = NULL; uint32_t start_pack = m ? m->num_packs : 0; for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) - total_objects += info[cur_pack].p->num_objects; + total_objects = st_add(total_objects, + info[cur_pack].p->num_objects); /* * As we de-duplicate by fanout value, we expect the fanout @@ -724,7 +725,8 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m, &fanout.entries[cur_object].oid)) continue; - ALLOC_GROW(deduplicated_entries, *nr_objects + 1, alloc_objects); + ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1), + alloc_objects); memcpy(&deduplicated_entries[*nr_objects], &fanout.entries[cur_object], sizeof(struct pack_midx_entry)); From 2bc764c1d4d1bc12ba7d95ceebb0da54ba381be5 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:46 -0400 Subject: [PATCH 08/20] midx.c: prevent overflow in `write_midx_internal()` When writing a MIDX, we use the chunk-format API to write out each individual chunk of the MIDX. Each chunk of the MIDX is tracked via a call to `add_chunk()`, along with the expected size of that chunk. Guard against overflow when dealing with a MIDX with a large number of entries (and consequently, large chunks within the MIDX file itself) to avoid corrupting the contents of the MIDX itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/midx.c b/midx.c index 909639eea5..606e3ae79e 100644 --- a/midx.c +++ b/midx.c @@ -1501,21 +1501,22 @@ static int write_midx_internal(const char *object_dir, add_chunk(cf, MIDX_CHUNKID_OIDFANOUT, MIDX_CHUNK_FANOUT_SIZE, write_midx_oid_fanout); add_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, - (size_t)ctx.entries_nr * the_hash_algo->rawsz, + st_mult(ctx.entries_nr, the_hash_algo->rawsz), write_midx_oid_lookup); add_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, - (size_t)ctx.entries_nr * MIDX_CHUNK_OFFSET_WIDTH, + st_mult(ctx.entries_nr, MIDX_CHUNK_OFFSET_WIDTH), write_midx_object_offsets); if (ctx.large_offsets_needed) add_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, - (size_t)ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH, + st_mult(ctx.num_large_offsets, + MIDX_CHUNK_LARGE_OFFSET_WIDTH), write_midx_large_offsets); if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { ctx.pack_order = midx_pack_order(&ctx); add_chunk(cf, MIDX_CHUNKID_REVINDEX, - ctx.entries_nr * sizeof(uint32_t), + st_mult(ctx.entries_nr, sizeof(uint32_t)), write_midx_revindex); } From d67609bdded151d78522911629c9e7da7fd06640 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:49 -0400 Subject: [PATCH 09/20] midx.c: prevent overflow in `fill_included_packs_batch()` In a similar spirit as in previous commits, avoid an integer overflow when computing the expected size of a MIDX. (Note that this is also OK as-is, since `p->pack_size` is an `off_t`, so this computation should already be done as 64-bit integers. But again, let's use `st_mult()` to make this fact clear). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- midx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/midx.c b/midx.c index 606e3ae79e..067482a98c 100644 --- a/midx.c +++ b/midx.c @@ -1994,8 +1994,8 @@ static int fill_included_packs_batch(struct repository *r, if (open_pack_index(p) || !p->num_objects) continue; - expected_size = (size_t)(p->pack_size - * pack_info[i].referenced_objects); + expected_size = st_mult(p->pack_size, + pack_info[i].referenced_objects); expected_size /= p->num_objects; if (expected_size >= batch_size) From 0948c501761585bdf3e4e1133700c2eb867d88e6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:52 -0400 Subject: [PATCH 10/20] pack-bitmap.c: ensure that eindex lookups don't overflow When a bitmap is used to answer some reachability query, it creates a pseudo-bitmap called the "extended index" on top of any existing bitmaps to store objects that are relevant to the query, but not mentioned in the bitmap. When looking up the ith object in the extended index in a bitmap, it is common to write something like: bitmap_get(result, i + bitmap_num_objects(bitmap_git)) , indicating that we want the ith object following all other objects mentioned in the bitmap_git. Since the type of `i` and the return type of `bitmap_num_objects()` are both `uint32_t`s, But if there are either a large number of objects in the bitmap, or a large number of objects in the extended index (or both), this addition can overflow when the sum is greater than 2^32-1. Having that large of a bitmap position is entirely acceptable, but we need to ensure that the computed bitmap position for that object is performed using 64-bits and doesn't overflow. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 999f962602..ed9294deb8 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1152,7 +1152,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { struct object *obj; - if (!bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) + if (!bitmap_get(objects, st_add(bitmap_num_objects(bitmap_git), i))) continue; obj = eindex->objects[i]; @@ -1331,7 +1331,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, * them individually. */ for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_num_objects(bitmap_git); + size_t pos = st_add(i, bitmap_num_objects(bitmap_git)); if (eindex->objects[i]->type == type && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos)) @@ -1422,7 +1422,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, } for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_num_objects(bitmap_git); + size_t pos = st_add(i, bitmap_num_objects(bitmap_git)); if (eindex->objects[i]->type == OBJ_BLOB && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos) && @@ -1873,7 +1873,8 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { if (eindex->objects[i]->type == type && - bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) + bitmap_get(objects, + st_add(bitmap_num_objects(bitmap_git), i))) count++; } @@ -2287,7 +2288,8 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) for (i = 0; i < eindex->count; i++) { struct object *obj = eindex->objects[i]; - if (!bitmap_get(result, bitmap_num_objects(bitmap_git) + i)) + if (!bitmap_get(result, + st_add(bitmap_num_objects(bitmap_git), i))) continue; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) From 48f3f8cf37043f69e9834d38e2439abfde280964 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:54 -0400 Subject: [PATCH 11/20] commit-graph.c: prevent overflow in `write_commit_graph_file()` When writing a commit-graph, we use the chunk-format API to write out each individual chunk of the commit-graph. Each chunk of the commit-graph is tracked via a call to `add_chunk()`, along with the expected size of that chunk. Similar to an earlier commit which handled the identical issue in the MIDX machinery, guard against overflow when dealing with a commit-graph with a large number of entries to avoid corrupting the contents of the commit-graph itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 843bdb458d..86c76bd2f8 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1953,35 +1953,35 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) add_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, GRAPH_FANOUT_SIZE, write_graph_chunk_fanout); - add_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, hashsz * ctx->commits.nr, + add_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, st_mult(hashsz, ctx->commits.nr), write_graph_chunk_oids); - add_chunk(cf, GRAPH_CHUNKID_DATA, (hashsz + 16) * ctx->commits.nr, + add_chunk(cf, GRAPH_CHUNKID_DATA, st_mult(hashsz + 16, ctx->commits.nr), write_graph_chunk_data); if (ctx->write_generation_data) add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA, - sizeof(uint32_t) * ctx->commits.nr, + st_mult(sizeof(uint32_t), ctx->commits.nr), write_graph_chunk_generation_data); if (ctx->num_generation_data_overflows) add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW, - sizeof(timestamp_t) * ctx->num_generation_data_overflows, + st_mult(sizeof(timestamp_t), ctx->num_generation_data_overflows), write_graph_chunk_generation_data_overflow); if (ctx->num_extra_edges) add_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, - 4 * ctx->num_extra_edges, + st_mult(4, ctx->num_extra_edges), write_graph_chunk_extra_edges); if (ctx->changed_paths) { add_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES, - sizeof(uint32_t) * ctx->commits.nr, + st_mult(sizeof(uint32_t), ctx->commits.nr), write_graph_chunk_bloom_indexes); add_chunk(cf, GRAPH_CHUNKID_BLOOMDATA, - sizeof(uint32_t) * 3 - + ctx->total_bloom_filter_data_size, + st_add(sizeof(uint32_t) * 3, + ctx->total_bloom_filter_data_size), write_graph_chunk_bloom_data); } if (ctx->num_commit_graphs_after > 1) add_chunk(cf, GRAPH_CHUNKID_BASE, - hashsz * (ctx->num_commit_graphs_after - 1), + st_mult(hashsz, ctx->num_commit_graphs_after - 1), write_graph_chunk_base); hashwrite_be32(f, GRAPH_SIGNATURE); @@ -1999,7 +1999,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) get_num_chunks(cf)); ctx->progress = start_delayed_progress( progress_title.buf, - get_num_chunks(cf) * ctx->commits.nr); + st_mult(get_num_chunks(cf), ctx->commits.nr)); } write_chunkfile(cf, ctx); From 209250ef38f353f174ee9e90e55f5a4605449f70 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:37:57 -0400 Subject: [PATCH 12/20] commit-graph.c: prevent overflow in add_graph_to_chain() The commit-graph uses a fanout table with 4-byte entries to store the number of commits at each shard of the commit-graph. So it is OK to have a commit graph with as many as 2^32-1 stored commits. But we risk overflowing any computation which may exceed the 32-bit (unsigned) maximum when those computations are (incorrectly) performed using 32-bit operands. There are a couple of spots in `add_graph_to_chain()` where we could potentially overflow the result: - First, when comparing the list of existing entries in the commit-graph chain. It is unlikely that this should ever overflow, since it would require having roughly 2^32-1/g->hash_len commit-graphs in the chain. But let's guard that computation with a `st_mult()` just to be safe. - Second, when computing the number of commits in the graph added to the front of the chain. This value is also a 32-bit unsigned, but we should make sure that it does not grow beyond the maximum value. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 86c76bd2f8..17ab3e8029 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -482,7 +482,7 @@ static int add_graph_to_chain(struct commit_graph *g, if (!cur_g || !oideq(&oids[n], &cur_g->oid) || - !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) { + !hasheq(oids[n].hash, g->chunk_base_graphs + st_mult(g->hash_len, n))) { warning(_("commit-graph chain does not match")); return 0; } @@ -492,8 +492,15 @@ static int add_graph_to_chain(struct commit_graph *g, g->base_graph = chain; - if (chain) + if (chain) { + if (unsigned_add_overflows(chain->num_commits, + chain->num_commits_in_base)) { + warning(_("commit count in base graph too high: %"PRIuMAX), + (uintmax_t)chain->num_commits_in_base); + return 0; + } g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base; + } return 1; } From 0bd8f30a0e4e474163eb2d8dc3020d51ec3c8a35 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:00 -0400 Subject: [PATCH 13/20] commit-graph.c: prevent overflow in `load_oid_from_graph()` In a similar spirit as previous commits, ensure that we don't overflow when trying to compute an offset into the `chunk_oid_lookup` table when the `lex_index` of the item we're trying to look up exceeds `2^32-1/g->hash_len`. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index 17ab3e8029..517c816a94 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -754,7 +754,7 @@ static void load_oid_from_graph(struct commit_graph *g, lex_index = pos - g->num_commits_in_base; - oidread(oid, g->chunk_oid_lookup + g->hash_len * lex_index); + oidread(oid, g->chunk_oid_lookup + st_mult(g->hash_len, lex_index)); } static struct commit_list **insert_parent_or_die(struct repository *r, From 2740ed1c76df769aa1c6e75020ace72e2cc2e47f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:03 -0400 Subject: [PATCH 14/20] commit-graph.c: prevent overflow in `fill_commit_graph_info()` In a similar spirit as previous commits, ensure that we don't overflow in a few spots within `fill_commit_graph_info()`: - First, when computing an offset into the commit data chunk, which can occur when the `lex_index` of the item we're looking up exceeds 2^32-1/GRAPH_DATA_WIDTH. - A similar issue when computing the generation date offset for commits with `lex_index` greater than 2^32-1/4. Note that in practice this will never overflow, since the left-hand operand is from calling `sizeof(...)` and is thus already a `size_t`. But wrap that in an `st_mult()` to make it clear that we intend to perform this computation using 64-bit operands. - Finally, a nearly identical issue as above when computing an offset into the `generation_data_overflow` chunk. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 517c816a94..69110e4dbb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -790,7 +790,7 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, die(_("invalid commit position. commit-graph is likely corrupt")); lex_index = pos - g->num_commits_in_base; - commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; + commit_data = g->chunk_commit_data + st_mult(GRAPH_DATA_WIDTH, lex_index); graph_data = commit_graph_data_at(item); graph_data->graph_pos = pos; @@ -800,14 +800,14 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, item->date = (timestamp_t)((date_high << 32) | date_low); if (g->read_generation_data) { - offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index); + offset = (timestamp_t)get_be32(g->chunk_generation_data + st_mult(sizeof(uint32_t), lex_index)); if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) { if (!g->chunk_generation_data_overflow) die(_("commit-graph requires overflow generation data but has none")); offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW; - graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + 8 * offset_pos); + graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos)); } else graph_data->generation = item->date + offset; } else From 50a71c2942167654f95d00b450a961cf387547ec Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:05 -0400 Subject: [PATCH 15/20] commit-graph.c: prevent overflow in `fill_commit_in_graph()` In a similar spirit as previous commits, ensure that we don't overflow when the lex_index of the commit we are trying to fill out exceeds 2^32-1/(g->hash_len+16). The other hunk touched in this patch is not susceptible to overflow, since an explicit cast is made to a 64-bit unsigned value. For clarity and consistency with the rest of the commits in this series, avoid a tricky to reason about cast, and use `st_mult()` directly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 69110e4dbb..5e32063d3d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -838,7 +838,7 @@ static int fill_commit_in_graph(struct repository *r, fill_commit_graph_info(item, g, pos); lex_index = pos - g->num_commits_in_base; - commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; + commit_data = g->chunk_commit_data + st_mult(g->hash_len + 16, lex_index); item->object.parsed = 1; @@ -860,7 +860,7 @@ static int fill_commit_in_graph(struct repository *r, } parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + - 4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK)); + st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK)); do { edge_value = get_be32(parent_data_ptr); pptr = insert_parent_or_die(r, g, From 51c31a6408c1eae3ad6c2f78ec136c1b415cad72 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:08 -0400 Subject: [PATCH 16/20] commit-graph.c: prevent overflow in `load_tree_for_commit()` In a similar spirit as previous commits, ensure that we don't overflow when computing an offset into the commit_data chunk when the (relative) graph position exceeds 2^32-1/GRAPH_DATA_WIDTH. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index 5e32063d3d..08d773567f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -980,7 +980,7 @@ static struct tree *load_tree_for_commit(struct repository *r, g = g->base_graph; commit_data = g->chunk_commit_data + - GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base); + st_mult(GRAPH_DATA_WIDTH, graph_pos - g->num_commits_in_base); oidread(&oid, commit_data); set_commit_tree(c, lookup_tree(r, &oid)); From 19565d093d248ba4c2330d96314a547feed41112 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:11 -0400 Subject: [PATCH 17/20] commit-graph.c: prevent overflow in `split_graph_merge_strategy()` In a similar spirit as previous commits, ensure that we don't overflow when choosing how to split and merge different layers of the commit-graph. In particular, avoid a potential overflow between `size_mult` and `num_commits`, as well as a potential overflow between the number of commits currently in the merged graph, and the number of commits in the graph about to be merged. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index 08d773567f..d9795e3aa4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2112,11 +2112,16 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED && flags != COMMIT_GRAPH_SPLIT_REPLACE) { - while (g && (g->num_commits <= size_mult * num_commits || + while (g && (g->num_commits <= st_mult(size_mult, num_commits) || (max_commits && num_commits > max_commits))) { if (g->odb != ctx->odb) break; + if (unsigned_add_overflows(num_commits, g->num_commits)) + die(_("cannot merge graphs with %"PRIuMAX", " + "%"PRIuMAX" commits"), + (uintmax_t)num_commits, + (uintmax_t)g->num_commits); num_commits += g->num_commits; g = g->base_graph; From d76e0a744d3a8c1713f0e913325cab7da92f01ef Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:13 -0400 Subject: [PATCH 18/20] commit-graph.c: prevent overflow in `merge_commit_graph()` When merging two commit graphs, ensure that we don't attempt to merge two graphs which, when combined, have more total commits than the 32-bit unsigned maximum. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index d9795e3aa4..8333ce8e04 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2179,6 +2179,11 @@ static void merge_commit_graph(struct write_commit_graph_context *ctx, uint32_t i; uint32_t offset = g->num_commits_in_base; + if (unsigned_add_overflows(ctx->commits.nr, g->num_commits)) + die(_("cannot merge graph %s, too many commits: %"PRIuMAX), + oid_to_hex(&g->oid), + (uintmax_t)st_add(ctx->commits.nr, g->num_commits)); + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc); for (i = 0; i < g->num_commits; i++) { From 588af1bfd3c810e02df1d8adc37e9c43a7f97920 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:16 -0400 Subject: [PATCH 19/20] commit-graph.c: prevent overflow in `write_commit_graph()` In a similar spirit as previous commits, ensure that we don't overflow when trying to read an existing OID while writing a new commit-graph. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index 8333ce8e04..54697e7a4d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2454,7 +2454,7 @@ int write_commit_graph(struct object_directory *odb, struct commit_graph *g = ctx->r->objects->commit_graph; for (i = 0; i < g->num_commits; i++) { struct object_id oid; - oidread(&oid, g->chunk_oid_lookup + g->hash_len * i); + oidread(&oid, g->chunk_oid_lookup + st_mult(g->hash_len, i)); oid_array_append(&ctx->oids, &oid); } } From 9a25cad7e0228bfd16f2c41b34e9d71a4217085c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 12 Jul 2023 19:38:19 -0400 Subject: [PATCH 20/20] commit-graph.c: prevent overflow in `verify_commit_graph()` In a similar spirit as previous commits, ensure that we don't overflow when trying to read an OID out of an existing commit-graph during verification. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 54697e7a4d..dc5bcfe05b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2585,7 +2585,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit; - oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i); + oidread(&cur_oid, g->chunk_oid_lookup + st_mult(g->hash_len, i)); if (i && oidcmp(&prev_oid, &cur_oid) >= 0) graph_report(_("commit-graph has incorrect OID order: %s then %s"), @@ -2633,7 +2633,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) timestamp_t generation; display_progress(progress, i + 1); - oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i); + oidread(&cur_oid, g->chunk_oid_lookup + st_mult(g->hash_len, i)); graph_commit = lookup_commit(r, &cur_oid); odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r));