From 570b8b883617df2acfedab88b9f5af0b50c821cd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 16:58:23 -0400 Subject: chunk-format: note that pair_chunk() is unsafe The pair_chunk() function is provided as an easy helper for parsing chunks that just want a pointer to a set of bytes. But every caller has a hidden bug: because we return only the pointer without the matching chunk size, the callers have no clue how many bytes they are allowed to look at. And as a result, they may read off the end of the mmap'd data when the on-disk file does not match their expectations. Since chunk files are typically used for local-repository data like commit-graph files and midx's, the security implications here are pretty mild. The worst that can happen is that you hand somebody a corrupted repository tarball, and running Git on it does an out-of-bounds read and crashes. So it's worth being more defensive, but we don't need to drop everything and fix every caller immediately. I noticed the problem because the pair_chunk_fn() callback does not look at its chunk_size argument, and wanted to annotate it to silence -Wunused-parameter. We could do that now, but we'd lose the hint that this code should be audited and fixed. So instead, let's set ourselves up for going down that path: 1. Provide a pair_chunk() function that does return the size, which prepares us for fixing these cases. 2. Rename the existing function to pair_chunk_unsafe(). That gives us an easy way to grep for cases which still need to be fixed, and the name should cause anybody adding new calls to think twice before using it. There are no callers of the "safe" version yet, but we'll add some in subsequent patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 931f557350..3165218ab5 100644 --- a/midx.c +++ b/midx.c @@ -143,19 +143,19 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local MIDX_HEADER_SIZE, m->num_chunks)) goto cleanup_fail; - if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND) + if (pair_chunk_unsafe(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND) die(_("multi-pack-index missing required pack-name chunk")); if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND) die(_("multi-pack-index missing required OID fanout chunk")); - if (pair_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND) + if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND) die(_("multi-pack-index missing required OID lookup chunk")); - if (pair_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND) + if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND) die(_("multi-pack-index missing required object offsets chunk")); - pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); + pair_chunk_unsafe(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) - pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); + pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); m->num_objects = ntohl(m->chunk_oid_fanout[255]); -- cgit v1.3 From e3c9600397bde3dcd2f628ff1eec098f79f79b67 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 16:59:19 -0400 Subject: midx: stop ignoring malformed oid fanout chunk When we load the oid-fanout chunk, our callback checks that its size is reasonable and returns an error if not. However, the caller only checks our return value against CHUNK_NOT_FOUND, so we end up ignoring the error completely! Using a too-small fanout table means we end up accessing random memory for the fanout and segfault. We can fix this by checking for any non-zero return value, rather than just CHUNK_NOT_FOUND, and adjusting our error message to cover both cases. We could handle each error code individually, but there's not much point for such a rare case. The extra message produced in the callback makes it clear what is going on. The same pattern is used in the adjacent code. Those cases are actually OK for now because they do not use a custom callback, so the only error they can get is CHUNK_NOT_FOUND. But let's convert them, as this is an accident waiting to happen (especially as we convert some of them away from pair_chunk). The error messages are more verbose, but it should be rare for a user to see these anyway. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 16 ++++++++-------- t/t5319-multi-pack-index.sh | 20 +++++++++++++++++++- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 3165218ab5..21d7dd15ef 100644 --- a/midx.c +++ b/midx.c @@ -143,14 +143,14 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local MIDX_HEADER_SIZE, m->num_chunks)) goto cleanup_fail; - if (pair_chunk_unsafe(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required pack-name chunk")); - if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required OID fanout chunk")); - if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required OID lookup chunk")); - if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND) - die(_("multi-pack-index missing required object offsets chunk")); + if (pair_chunk_unsafe(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names)) + die(_("multi-pack-index required pack-name chunk missing or corrupted")); + if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) + die(_("multi-pack-index required OID fanout chunk missing or corrupted")); + if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup)) + die(_("multi-pack-index required OID lookup chunk missing or corrupted")); + if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets)) + die(_("multi-pack-index required object offsets chunk missing or corrupted")); pair_chunk_unsafe(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 1bcc02004d..b8fe85aeba 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -2,6 +2,7 @@ test_description='multi-pack-indexes' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-chunk.sh GIT_TEST_MULTI_PACK_INDEX=0 objdir=.git/objects @@ -438,7 +439,7 @@ test_expect_success 'verify extended chunk count' ' test_expect_success 'verify missing required chunk' ' corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \ - "missing required" + "required pack-name chunk missing" ' test_expect_success 'verify invalid chunk offset' ' @@ -1055,4 +1056,21 @@ test_expect_success 'repack with delta islands' ' ) ' +corrupt_chunk () { + midx=.git/objects/pack/multi-pack-index && + test_when_finished "rm -rf $midx" && + git repack -ad --write-midx && + corrupt_chunk_file $midx "$@" +} + +test_expect_success 'reader notices too-small oid fanout chunk' ' + corrupt_chunk OIDF clear 00000000 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: multi-pack-index OID fanout is of the wrong size + fatal: multi-pack-index required OID fanout chunk missing or corrupted + EOF + test_cmp expect err +' + test_done -- cgit v1.3 From fc926567ede1f82799ef49dd54aa37b4497a5453 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:02:03 -0400 Subject: midx: check size of oid lookup chunk When reading an on-disk multi-pack-index, we take the number of objects in the midx from the final value of the fanout table. But we just blindly assume that the chunk containing the actual oid entries is the correct size. This can lead to us reading out-of-bounds memory if the lookup chunk is too small (or if the fanout is corrupted; when they don't agree we cannot tell which one is wrong). Note that we bump the assignment of m->num_objects into the fanout parser callback, so that it's set when we parse the lookup table (otherwise we'd have to manually record the lookup table size and check it later). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 18 +++++++++++++++--- t/t5319-multi-pack-index.sh | 10 ++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 21d7dd15ef..62e4c03e79 100644 --- a/midx.c +++ b/midx.c @@ -71,6 +71,20 @@ static int midx_read_oid_fanout(const unsigned char *chunk_start, error(_("multi-pack-index OID fanout is of the wrong size")); return 1; } + m->num_objects = ntohl(m->chunk_oid_fanout[255]); + return 0; +} + +static int midx_read_oid_lookup(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct multi_pack_index *m = data; + m->chunk_oid_lookup = chunk_start; + + if (chunk_size != st_mult(m->hash_len, m->num_objects)) { + error(_("multi-pack-index OID lookup chunk is the wrong size")); + return 1; + } return 0; } @@ -147,7 +161,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local die(_("multi-pack-index required pack-name chunk missing or corrupted")); if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) die(_("multi-pack-index required OID fanout chunk missing or corrupted")); - if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup)) + if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m)) die(_("multi-pack-index required OID lookup chunk missing or corrupted")); if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets)) die(_("multi-pack-index required object offsets chunk missing or corrupted")); @@ -157,8 +171,6 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); - m->num_objects = ntohl(m->chunk_oid_fanout[255]); - CALLOC_ARRAY(m->pack_names, m->num_packs); CALLOC_ARRAY(m->packs, m->num_packs); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index b8fe85aeba..2722e495b2 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1073,4 +1073,14 @@ test_expect_success 'reader notices too-small oid fanout chunk' ' test_cmp expect err ' +test_expect_success 'reader notices too-small oid lookup chunk' ' + corrupt_chunk OIDL clear 00000000 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: multi-pack-index OID lookup chunk is the wrong size + fatal: multi-pack-index required OID lookup chunk missing or corrupted + EOF + test_cmp expect err +' + test_done -- cgit v1.3 From 72a9a08283f1b56598d4af5efb8cd178d4150323 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:14 -0400 Subject: midx: check size of pack names chunk We parse the pack-name chunk as a series of NUL-terminated strings. But since we don't look at the chunk size, there's nothing to guarantee that we don't parse off the end of the chunk (or even off the end of the mapped file). We can record the length, and then as we parse make sure that we never walk past it. The new test exercises the case, though note that it does not actually segfault before this patch. It hits a NUL byte somewhere in one of the other chunks, and comes up with a garbage pack name. You could construct one that reads out-of-bounds (e.g., a PNAM chunk at the end of file), but this case is simple and sufficient to check that we detect the problem. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 11 +++++++++-- midx.h | 1 + t/t5319-multi-pack-index.sh | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 62e4c03e79..ec585cae1b 100644 --- a/midx.c +++ b/midx.c @@ -157,7 +157,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local MIDX_HEADER_SIZE, m->num_chunks)) goto cleanup_fail; - if (pair_chunk_unsafe(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names)) + if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) die(_("multi-pack-index required pack-name chunk missing or corrupted")); if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m)) die(_("multi-pack-index required OID fanout chunk missing or corrupted")); @@ -176,9 +176,16 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local cur_pack_name = (const char *)m->chunk_pack_names; for (i = 0; i < m->num_packs; i++) { + const char *end; + size_t avail = m->chunk_pack_names_len - + (cur_pack_name - (const char *)m->chunk_pack_names); + m->pack_names[i] = cur_pack_name; - cur_pack_name += strlen(cur_pack_name) + 1; + end = memchr(cur_pack_name, '\0', avail); + if (!end) + die(_("multi-pack-index pack-name chunk is too short")); + cur_pack_name = end + 1; if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) die(_("multi-pack-index pack names out of order: '%s' before '%s'"), diff --git a/midx.h b/midx.h index 5578cd7b83..5b2a7da043 100644 --- a/midx.h +++ b/midx.h @@ -32,6 +32,7 @@ struct multi_pack_index { int local; const unsigned char *chunk_pack_names; + size_t chunk_pack_names_len; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 2722e495b2..0a0ccec8a4 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1083,4 +1083,15 @@ test_expect_success 'reader notices too-small oid lookup chunk' ' test_cmp expect err ' +test_expect_success 'reader notices too-small pack names chunk' ' + # There is no NUL to terminate the name here, so the + # chunk is too short. + corrupt_chunk PNAM clear 70656666 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + fatal: multi-pack-index pack-name chunk is too short + EOF + test_cmp expect err +' + test_done -- cgit v1.3 From c9b9fefc13ccce7ed248488c982d1da38b0905c7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:23 -0400 Subject: midx: enforce chunk alignment on reading The midx reader assumes chunks are aligned to a 4-byte boundary: we treat the fanout chunk as an array of uint32_t, indexing it to feed the results to ntohl(). Without aligning the chunks, we may violate the CPU's alignment constraints. Though many platforms allow this, some do not. And certanily UBSan will complain, since it is undefined behavior. Even though most chunks are naturally 4-byte-aligned (because they are storing uint32_t or larger types), PNAM is not. It stores NUL-terminated pack names, so you can have a valid chunk with any length. The writing side handles this by 4-byte-aligning the chunk, introducing a few extra NULs as necessary. But since we don't check this on the reading side, we may end up with a misaligned fanout and trigger the undefined behavior. We have two options here: 1. Swap out ntohl(fanout[i]) for get_be32(fanout+i) everywhere. The latter handles alignment itself. It's possible that it's slightly slower (though in practice I'm not sure how true that is, especially for these code paths which then go on to do a binary search). 2. Enforce the alignment when reading the chunks. This is easy to do, since the table-of-contents reader can check it in one spot. I went with the second option here, just because it places less burden on maintenance going forward (it is OK to continue using ntohl), and we know it can't have any performance impact on the actual reads. The commit-graph code uses the same chunk API. It's usually also 4-byte aligned, but some chunks are not (like Bloom filter BDAT chunks). So we'll pass "1" here to allow any alignment. It doesn't suffer from the same problem as midx with its fanout because the fanout chunk is always the first (and the rest of the format dictates that the first chunk will start aligned). The new test shows the effect on a midx with a misaligned PNAM chunk. Note that the midx-reading code treats chunk-toc errors as soft, falling back to the non-midx path rather than calling die(), as we do for other parsing errors. Arguably we should make all of these behave the same, but that's out of scope for this patch. For now the test just expects the fallback behavior. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- chunk-format.c | 8 +++++++- chunk-format.h | 3 ++- commit-graph.c | 2 +- midx.c | 3 ++- t/t5319-multi-pack-index.sh | 14 ++++++++++++++ 5 files changed, 26 insertions(+), 4 deletions(-) (limited to 'midx.c') diff --git a/chunk-format.c b/chunk-format.c index 8d910e23f6..09ef86afa6 100644 --- a/chunk-format.c +++ b/chunk-format.c @@ -102,7 +102,8 @@ int read_table_of_contents(struct chunkfile *cf, const unsigned char *mfile, size_t mfile_size, uint64_t toc_offset, - int toc_length) + int toc_length, + unsigned expected_alignment) { int i; uint32_t chunk_id; @@ -120,6 +121,11 @@ int read_table_of_contents(struct chunkfile *cf, error(_("terminating chunk id appears earlier than expected")); return 1; } + if (chunk_offset % expected_alignment != 0) { + error(_("chunk id %"PRIx32" not %d-byte aligned"), + chunk_id, expected_alignment); + return 1; + } table_of_contents += CHUNK_TOC_ENTRY_SIZE; next_chunk_offset = get_be64(table_of_contents + 4); diff --git a/chunk-format.h b/chunk-format.h index 8dce7967f4..d608b8135c 100644 --- a/chunk-format.h +++ b/chunk-format.h @@ -36,7 +36,8 @@ int read_table_of_contents(struct chunkfile *cf, const unsigned char *mfile, size_t mfile_size, uint64_t toc_offset, - int toc_length); + int toc_length, + unsigned expected_alignment); #define CHUNK_NOT_FOUND (-2) diff --git a/commit-graph.c b/commit-graph.c index b217e19194..472332f603 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -417,7 +417,7 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s, cf = init_chunkfile(NULL); if (read_table_of_contents(cf, graph->data, graph_size, - GRAPH_HEADER_SIZE, graph->num_chunks)) + GRAPH_HEADER_SIZE, graph->num_chunks, 1)) goto free_and_return; read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph); diff --git a/midx.c b/midx.c index ec585cae1b..98f84fbe61 100644 --- a/midx.c +++ b/midx.c @@ -154,7 +154,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local cf = init_chunkfile(NULL); if (read_table_of_contents(cf, m->data, midx_size, - MIDX_HEADER_SIZE, m->num_chunks)) + MIDX_HEADER_SIZE, m->num_chunks, + MIDX_CHUNK_ALIGNMENT)) goto cleanup_fail; if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len)) diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 0a0ccec8a4..34f5944142 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1094,4 +1094,18 @@ test_expect_success 'reader notices too-small pack names chunk' ' test_cmp expect err ' +test_expect_success 'reader handles unaligned chunks' ' + # A 9-byte PNAM means all of the subsequent chunks + # will no longer be 4-byte aligned, but it is still + # a valid one-pack chunk on its own (it is "foo.pack\0"). + corrupt_chunk PNAM clear 666f6f2e7061636b00 && + git -c core.multipackindex=false log >expect.out && + git -c core.multipackindex=true log >out 2>err && + test_cmp expect.out out && + cat >expect.err <<-\EOF && + error: chunk id 4f494446 not 4-byte aligned + EOF + test_cmp expect.err err +' + test_done -- cgit v1.3 From 0924869b4e27ff9db63e2d85b892244e058fecc3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:27 -0400 Subject: midx: check size of object offset chunk The object offset chunk has one fixed-size entry for each object in the midx. But since we don't check its size, we may access out-of-bounds memory if we see a corrupt or malicious midx file. Sine the entries are fixed-size, the total length can be known up-front, and we can just check it while parsing the chunk (this is similar to what we do when opening pack idx files, which contain a similar offset table). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 15 ++++++++++++++- t/t5319-multi-pack-index.sh | 10 ++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 98f84fbe61..7b1b45f381 100644 --- a/midx.c +++ b/midx.c @@ -88,6 +88,19 @@ static int midx_read_oid_lookup(const unsigned char *chunk_start, return 0; } +static int midx_read_object_offsets(const unsigned char *chunk_start, + size_t chunk_size, void *data) +{ + struct multi_pack_index *m = data; + m->chunk_object_offsets = chunk_start; + + if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) { + error(_("multi-pack-index object offset chunk is the wrong size")); + return 1; + } + return 0; +} + struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local) { struct multi_pack_index *m = NULL; @@ -164,7 +177,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local die(_("multi-pack-index required OID fanout chunk missing or corrupted")); if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m)) die(_("multi-pack-index required OID lookup chunk missing or corrupted")); - if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets)) + if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) die(_("multi-pack-index required object offsets chunk missing or corrupted")); pair_chunk_unsafe(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 34f5944142..30687d5452 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1108,4 +1108,14 @@ test_expect_success 'reader handles unaligned chunks' ' test_cmp expect.err err ' +test_expect_success 'reader notices too-small object offset chunk' ' + corrupt_chunk OOFF clear 00000000 && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: multi-pack-index object offset chunk is the wrong size + fatal: multi-pack-index required object offsets chunk missing or corrupted + EOF + test_cmp expect err +' + test_done -- cgit v1.3 From 2abd56e9b2195c8111ff5d16efafabc5bccba92b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:30 -0400 Subject: midx: bounds-check large offset chunk When we see a large offset bit in the regular midx offset table, we use the entry as an index into a separate large offset table (just like a pack idx does). But we don't bounds-check the access to that large offset table (nor even record its size when we parse the chunk!). The equivalent code for a regular pack idx is in check_pack_index_ptr(). But things are a bit simpler here because of the chunked format: we can just check our array index directly. As a bonus, we can get rid of the st_mult() here. If our array bounds-check is successful, then we know that the result will fit in a size_t (and the bounds check uses a division to avoid overflow entirely). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 8 +++++--- midx.h | 1 + t/t5319-multi-pack-index.sh | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 7b1b45f381..3e768d0df0 100644 --- a/midx.c +++ b/midx.c @@ -180,7 +180,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) die(_("multi-pack-index required object offsets chunk missing or corrupted")); - pair_chunk_unsafe(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); + pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, + &m->chunk_large_offsets_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); @@ -303,8 +304,9 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; - return get_be64(m->chunk_large_offsets + - st_mult(sizeof(uint64_t), offset32)); + if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t)) + die(_("multi-pack-index large offset out of bounds")); + return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); } return offset32; diff --git a/midx.h b/midx.h index 5b2a7da043..e8e8884d16 100644 --- a/midx.h +++ b/midx.h @@ -37,6 +37,7 @@ struct multi_pack_index { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; const unsigned char *chunk_large_offsets; + size_t chunk_large_offsets_len; const unsigned char *chunk_revindex; const char **pack_names; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 30687d5452..16050f39d9 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1118,4 +1118,24 @@ test_expect_success 'reader notices too-small object offset chunk' ' test_cmp expect err ' +test_expect_success 'reader bounds-checks large offset table' ' + # re-use the objects64 dir here to cheaply get access to a midx + # with large offsets. + git init repo && + test_when_finished "rm -rf repo" && + ( + cd repo && + (cd ../objects64 && pwd) >.git/objects/info/alternates && + git multi-pack-index --object-dir=../objects64 write && + midx=../objects64/pack/multi-pack-index && + corrupt_chunk_file $midx LOFF clear && + test_must_fail git cat-file \ + --batch-check --batch-all-objects 2>err && + cat >expect <<-\EOF && + fatal: multi-pack-index large offset out of bounds + EOF + test_cmp expect err + ) +' + test_done -- cgit v1.3 From c0fe9b2da5610bd4ff62d7871dfbbfa0247c7949 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:33 -0400 Subject: midx: check size of revindex chunk When we load a revindex from disk, we check the size of the file compared to the number of objects we expect it to have. But when we use a RIDX chunk stored directly in the midx, we just access the memory directly. This can lead to out-of-bounds memory access for a corrupted or malicious multi-pack-index file. We can catch this by recording the RIDX chunk size, and then checking it against the expected size when we "load" the revindex. Note that this check is much simpler than the one that load_revindex_from_disk() does, because we just have the data array with no header (so we do not need to account for the header size, and nor do we need to bother validating the header values). The test confirms both that we catch this case, and that we continue the process (the revindex is required to use the midx bitmaps, but we fallback to a non-bitmap traversal). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 3 ++- midx.h | 1 + pack-revindex.c | 13 ++++++++++++- t/t5319-multi-pack-index.sh | 17 +++++++++++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) (limited to 'midx.c') diff --git a/midx.c b/midx.c index 3e768d0df0..2f3863c936 100644 --- a/midx.c +++ b/midx.c @@ -184,7 +184,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local &m->chunk_large_offsets_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) - pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); + pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, + &m->chunk_revindex_len); CALLOC_ARRAY(m->pack_names, m->num_packs); CALLOC_ARRAY(m->packs, m->num_packs); diff --git a/midx.h b/midx.h index e8e8884d16..a5d98919c8 100644 --- a/midx.h +++ b/midx.h @@ -39,6 +39,7 @@ struct multi_pack_index { const unsigned char *chunk_large_offsets; size_t chunk_large_offsets_len; const unsigned char *chunk_revindex; + size_t chunk_revindex_len; const char **pack_names; struct packed_git **packs; diff --git a/pack-revindex.c b/pack-revindex.c index 7fffcad912..6d8fd3645a 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -343,6 +343,17 @@ int verify_pack_revindex(struct packed_git *p) return res; } +static int can_use_midx_ridx_chunk(struct multi_pack_index *m) +{ + if (!m->chunk_revindex) + return 0; + if (m->chunk_revindex_len != st_mult(sizeof(uint32_t), m->num_objects)) { + error(_("multi-pack-index reverse-index chunk is the wrong size")); + return 0; + } + return 1; +} + int load_midx_revindex(struct multi_pack_index *m) { struct strbuf revindex_name = STRBUF_INIT; @@ -351,7 +362,7 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->revindex_data) return 0; - if (m->chunk_revindex) { + if (can_use_midx_ridx_chunk(m)) { /* * If the MIDX `m` has a `RIDX` chunk, then use its contents for * the reverse index instead of trying to load a separate `.rev` diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 16050f39d9..2a11dd1af6 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1138,4 +1138,21 @@ test_expect_success 'reader bounds-checks large offset table' ' ) ' +test_expect_success 'reader notices too-small revindex chunk' ' + # We only get a revindex with bitmaps (and likewise only + # load it when they are asked for). + test_config repack.writeBitmaps true && + corrupt_chunk RIDX clear 00000000 && + git -c core.multipackIndex=false rev-list \ + --all --use-bitmap-index >expect.out && + git -c core.multipackIndex=true rev-list \ + --all --use-bitmap-index >out 2>err && + test_cmp expect.out out && + cat >expect.err <<-\EOF && + error: multi-pack-index reverse-index chunk is the wrong size + warning: multi-pack bitmap is missing required reverse index + EOF + test_cmp expect.err err +' + test_done -- cgit v1.3