From 72a9a08283f1b56598d4af5efb8cd178d4150323 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:14 -0400 Subject: midx: check size of pack names chunk We parse the pack-name chunk as a series of NUL-terminated strings. But since we don't look at the chunk size, there's nothing to guarantee that we don't parse off the end of the chunk (or even off the end of the mapped file). We can record the length, and then as we parse make sure that we never walk past it. The new test exercises the case, though note that it does not actually segfault before this patch. It hits a NUL byte somewhere in one of the other chunks, and comes up with a garbage pack name. You could construct one that reads out-of-bounds (e.g., a PNAM chunk at the end of file), but this case is simple and sufficient to check that we detect the problem. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'midx.h') diff --git a/midx.h b/midx.h index 5578cd7b83..5b2a7da043 100644 --- a/midx.h +++ b/midx.h @@ -32,6 +32,7 @@ struct multi_pack_index { int local; const unsigned char *chunk_pack_names; + size_t chunk_pack_names_len; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; -- cgit v1.3 From 2abd56e9b2195c8111ff5d16efafabc5bccba92b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:30 -0400 Subject: midx: bounds-check large offset chunk When we see a large offset bit in the regular midx offset table, we use the entry as an index into a separate large offset table (just like a pack idx does). But we don't bounds-check the access to that large offset table (nor even record its size when we parse the chunk!). The equivalent code for a regular pack idx is in check_pack_index_ptr(). But things are a bit simpler here because of the chunked format: we can just check our array index directly. As a bonus, we can get rid of the st_mult() here. If our array bounds-check is successful, then we know that the result will fit in a size_t (and the bounds check uses a division to avoid overflow entirely). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 8 +++++--- midx.h | 1 + t/t5319-multi-pack-index.sh | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'midx.h') diff --git a/midx.c b/midx.c index 7b1b45f381..3e768d0df0 100644 --- a/midx.c +++ b/midx.c @@ -180,7 +180,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m)) die(_("multi-pack-index required object offsets chunk missing or corrupted")); - pair_chunk_unsafe(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); + pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, + &m->chunk_large_offsets_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); @@ -303,8 +304,9 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos) die(_("multi-pack-index stores a 64-bit offset, but off_t is too small")); offset32 ^= MIDX_LARGE_OFFSET_NEEDED; - return get_be64(m->chunk_large_offsets + - st_mult(sizeof(uint64_t), offset32)); + if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t)) + die(_("multi-pack-index large offset out of bounds")); + return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32); } return offset32; diff --git a/midx.h b/midx.h index 5b2a7da043..e8e8884d16 100644 --- a/midx.h +++ b/midx.h @@ -37,6 +37,7 @@ struct multi_pack_index { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; const unsigned char *chunk_large_offsets; + size_t chunk_large_offsets_len; const unsigned char *chunk_revindex; const char **pack_names; diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 30687d5452..16050f39d9 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1118,4 +1118,24 @@ test_expect_success 'reader notices too-small object offset chunk' ' test_cmp expect err ' +test_expect_success 'reader bounds-checks large offset table' ' + # re-use the objects64 dir here to cheaply get access to a midx + # with large offsets. + git init repo && + test_when_finished "rm -rf repo" && + ( + cd repo && + (cd ../objects64 && pwd) >.git/objects/info/alternates && + git multi-pack-index --object-dir=../objects64 write && + midx=../objects64/pack/multi-pack-index && + corrupt_chunk_file $midx LOFF clear && + test_must_fail git cat-file \ + --batch-check --batch-all-objects 2>err && + cat >expect <<-\EOF && + fatal: multi-pack-index large offset out of bounds + EOF + test_cmp expect err + ) +' + test_done -- cgit v1.3 From c0fe9b2da5610bd4ff62d7871dfbbfa0247c7949 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:33 -0400 Subject: midx: check size of revindex chunk When we load a revindex from disk, we check the size of the file compared to the number of objects we expect it to have. But when we use a RIDX chunk stored directly in the midx, we just access the memory directly. This can lead to out-of-bounds memory access for a corrupted or malicious multi-pack-index file. We can catch this by recording the RIDX chunk size, and then checking it against the expected size when we "load" the revindex. Note that this check is much simpler than the one that load_revindex_from_disk() does, because we just have the data array with no header (so we do not need to account for the header size, and nor do we need to bother validating the header values). The test confirms both that we catch this case, and that we continue the process (the revindex is required to use the midx bitmaps, but we fallback to a non-bitmap traversal). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- midx.c | 3 ++- midx.h | 1 + pack-revindex.c | 13 ++++++++++++- t/t5319-multi-pack-index.sh | 17 +++++++++++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) (limited to 'midx.h') diff --git a/midx.c b/midx.c index 3e768d0df0..2f3863c936 100644 --- a/midx.c +++ b/midx.c @@ -184,7 +184,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local &m->chunk_large_offsets_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) - pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); + pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, + &m->chunk_revindex_len); CALLOC_ARRAY(m->pack_names, m->num_packs); CALLOC_ARRAY(m->packs, m->num_packs); diff --git a/midx.h b/midx.h index e8e8884d16..a5d98919c8 100644 --- a/midx.h +++ b/midx.h @@ -39,6 +39,7 @@ struct multi_pack_index { const unsigned char *chunk_large_offsets; size_t chunk_large_offsets_len; const unsigned char *chunk_revindex; + size_t chunk_revindex_len; const char **pack_names; struct packed_git **packs; diff --git a/pack-revindex.c b/pack-revindex.c index 7fffcad912..6d8fd3645a 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -343,6 +343,17 @@ int verify_pack_revindex(struct packed_git *p) return res; } +static int can_use_midx_ridx_chunk(struct multi_pack_index *m) +{ + if (!m->chunk_revindex) + return 0; + if (m->chunk_revindex_len != st_mult(sizeof(uint32_t), m->num_objects)) { + error(_("multi-pack-index reverse-index chunk is the wrong size")); + return 0; + } + return 1; +} + int load_midx_revindex(struct multi_pack_index *m) { struct strbuf revindex_name = STRBUF_INIT; @@ -351,7 +362,7 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->revindex_data) return 0; - if (m->chunk_revindex) { + if (can_use_midx_ridx_chunk(m)) { /* * If the MIDX `m` has a `RIDX` chunk, then use its contents for * the reverse index instead of trying to load a separate `.rev` diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 16050f39d9..2a11dd1af6 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1138,4 +1138,21 @@ test_expect_success 'reader bounds-checks large offset table' ' ) ' +test_expect_success 'reader notices too-small revindex chunk' ' + # We only get a revindex with bitmaps (and likewise only + # load it when they are asked for). + test_config repack.writeBitmaps true && + corrupt_chunk RIDX clear 00000000 && + git -c core.multipackIndex=false rev-list \ + --all --use-bitmap-index >expect.out && + git -c core.multipackIndex=true rev-list \ + --all --use-bitmap-index >out 2>err && + test_cmp expect.out out && + cat >expect.err <<-\EOF && + error: multi-pack-index reverse-index chunk is the wrong size + warning: multi-pack bitmap is missing required reverse index + EOF + test_cmp expect.err err +' + test_done -- cgit v1.3