From 920f400e919c7c51f81adc6989cdd52630220783 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:50 -0400 Subject: commit-graph: check bounds when accessing BDAT chunk When loading Bloom filters from a commit-graph file, we use the offset values in the BIDX chunk to index into the memory mapped for the BDAT chunk. But since we don't record how big the BDAT chunk is, we just trust that the BIDX offsets won't cause us to read outside of the chunk memory. A corrupted or malicious commit-graph file will cause us to segfault (in practice this isn't a very interesting attack, since commit-graph files are local-only, and the worst case is an out-of-bounds read). We can't fix this by checking the chunk size during parsing, since the data in the BDAT chunk doesn't have a fixed size (that's why we need the BIDX in the first place). So we'll fix it in two parts: 1. Record the BDAT chunk size during parsing, and then later check that the BIDX offsets we look up are within bounds. 2. Because the offsets are relative to the end of the BDAT header, we must also make sure that the BDAT chunk is at least as large as the expected header size. Otherwise, we overflow when trying to move past the header, even for an offset of "0". We can check this early, during the parsing stage. The error messages are rather verbose, but since this is not something you'd expect to see outside of severe bugs or corruption, it makes sense to err on the side of too many details. Sadly we can't mention the filename during the chunk-parsing stage, as we haven't set g->filename at this point, nor passed it down through the stack. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- bloom.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'bloom.c') diff --git a/bloom.c b/bloom.c index aef6b5fea2..61abad7f8c 100644 --- a/bloom.c +++ b/bloom.c @@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos) return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); } +static int check_bloom_offset(struct commit_graph *g, uint32_t pos, + uint32_t offset) +{ + /* + * Note that we allow offsets equal to the data size, which would set + * our pointers at one past the end of the chunk memory. This is + * necessary because the on-disk index points to the end of the + * entries (so we can compute size by comparing adjacent ones). And + * naturally the final entry's end is one-past-the-end of the chunk. + */ + if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE) + return 0; + + warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path" + " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")", + (uintmax_t)offset, (uintmax_t)pos, + g->filename, (uintmax_t)g->chunk_bloom_data_size); + return -1; +} + static int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, uint32_t graph_pos) @@ -51,6 +71,10 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, else start_index = 0; + if (check_bloom_offset(g, lex_pos, end_index) < 0 || + check_bloom_offset(g, lex_pos - 1, start_index) < 0) + return 0; + filter->len = end_index - start_index; filter->data = (unsigned char *)(g->chunk_bloom_data + sizeof(unsigned char) * start_index + -- cgit v1.3 From 12192a9db9beb3c45dd5064f34d1fcdc71f6a062 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 9 Oct 2023 17:05:56 -0400 Subject: commit-graph: detect out-of-order BIDX offsets The BIDX chunk tells us the offsets at which each commit's Bloom filters can be found in the BDAT chunk. We compute the length of each filter by checking the offsets of neighbors and subtracting them. If the offsets are out of order, then we'll get a negative length, which we then store as a very large unsigned value. This can cause us to read out-of-bounds memory, as we access the hash data modulo "filter->len * BITS_PER_WORD". We can easily detect this case when loading the individual filters. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- bloom.c | 10 ++++++++++ t/t4216-log-bloom.sh | 13 +++++++++++++ 2 files changed, 23 insertions(+) (limited to 'bloom.c') diff --git a/bloom.c b/bloom.c index 61abad7f8c..1474aa19fa 100644 --- a/bloom.c +++ b/bloom.c @@ -75,6 +75,16 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, check_bloom_offset(g, lex_pos - 1, start_index) < 0) return 0; + if (end_index < start_index) { + warning("ignoring decreasing changed-path index offsets" + " (%"PRIuMAX" > %"PRIuMAX") for positions" + " %"PRIuMAX" and %"PRIuMAX" of %s", + (uintmax_t)start_index, (uintmax_t)end_index, + (uintmax_t)(lex_pos-1), (uintmax_t)lex_pos, + g->filename); + return 0; + } + filter->len = end_index - start_index; filter->data = (unsigned char *)(g->chunk_bloom_data + sizeof(unsigned char) * start_index + diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index f6054cbb27..2ba0324a69 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -441,4 +441,17 @@ test_expect_success 'Bloom reader notices too-small index chunk' ' test_cmp expect.err err ' +test_expect_success 'Bloom reader notices out-of-order index offsets' ' + # we do not know any real offsets, but we can pick + # something plausible; we should not get to the point of + # actually reading from the bogus offsets anyway. + corrupt_graph BIDX 4 0000000c00000005 && + echo "warning: ignoring decreasing changed-path index offsets" \ + "(12 > 5) for positions 1 and 2 of .git/objects/info/commit-graph" >expect.err && + git -c core.commitGraph=false log -- A/B/file2 >expect.out && + git -c core.commitGraph=true log -- A/B/file2 >out 2>err && + test_cmp expect.out out && + test_cmp expect.err err +' + test_done -- cgit v1.3