From 9d78fb0eb6fa67cee069d3a403acb2fadd46f058 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Nov 2023 02:12:07 -0500 Subject: midx: check consistency of fanout table The commit-graph, midx, and pack idx on-disk formats all have oid fanout tables which are fed to bsearch_hash(). If these tables do not increase monotonically, then the binary search may not only produce bogus values, it may cause out of bounds reads. We fixed this for commit graphs in 4169d89645 (commit-graph: check consistency of fanout table, 2023-10-09). That commit argued that we did not need to do the same for midx and pack idx files, because they already did this check. However, that is wrong. We _do_ check the fanout table for pack idx files when we load them, but we only do so for midx files when running "git multi-pack-index verify". So it is possible to get an out-of-bounds read by running a normal command with a specially crafted midx file. Let's fix this using the same solution (and roughly the same test) we did for the commit-graph in 4169d89645. This replaces the same check from "multi-pack-index verify", because verify uses the same read routines, we'd bail on reading the midx much sooner now. So let's make sure to copy its verbose error message. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5319-multi-pack-index.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 't') diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index d3c9e97feb..313496c0cf 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -1157,4 +1157,18 @@ test_expect_success 'reader notices too-small revindex chunk' ' test_cmp expect.err err ' +test_expect_success 'reader notices out-of-bounds fanout' ' + # This is similar to the out-of-bounds fanout test in t5318. The values + # in adjacent entries should be large but not identical (they + # are used as hi/lo starts for a binary search, which would then abort + # immediately). + corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) && + test_must_fail git log 2>err && + cat >expect <<-\EOF && + error: oid fanout out of order: fanout[254] = fe000000 > 5c = fanout[255] + fatal: multi-pack-index required OID fanout chunk missing or corrupted + EOF + test_cmp expect err +' + test_done -- cgit v1.3-5-g9baa From 93d29247298e9ae3fbc6dd8e022a6260b568191a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Nov 2023 02:14:34 -0500 Subject: commit-graph: clarify missing-chunk error messages When a required commit-graph chunk cannot be loaded, we leave its entry in the struct NULL, and then later complain that it is missing. But that's just one reason we might not have loaded it, as we also do some data quality checks. Let's switch these messages to say "missing or corrupted", which is exactly what the midx code says for the same cases. Likewise, we'll use the same phrasing and capitalization as those for consistency. And while we're here, we can mark them for translation (just like the midx ones). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 6 +++--- t/t5318-commit-graph.sh | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 't') diff --git a/commit-graph.c b/commit-graph.c index 87e594c42e..85450a93de 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -292,15 +292,15 @@ static int verify_commit_graph_lite(struct commit_graph *g) * itself. */ if (!g->chunk_oid_fanout) { - error("commit-graph is missing the OID Fanout chunk"); + error(_("commit-graph required OID fanout chunk missing or corrupted")); return 1; } if (!g->chunk_oid_lookup) { - error("commit-graph is missing the OID Lookup chunk"); + error(_("commit-graph required OID lookup chunk missing or corrupted")); return 1; } if (!g->chunk_commit_data) { - error("commit-graph is missing the Commit Data chunk"); + error(_("commit-graph required commit data chunk missing or corrupted")); return 1; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 6505ff595a..4c4a4ae13e 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -540,17 +540,17 @@ test_expect_success 'detect low chunk count' ' test_expect_success 'detect missing OID fanout chunk' ' corrupt_graph_and_verify $GRAPH_BYTE_OID_FANOUT_ID "\0" \ - "missing the OID Fanout chunk" + "commit-graph required OID fanout chunk missing or corrupted" ' test_expect_success 'detect missing OID lookup chunk' ' corrupt_graph_and_verify $GRAPH_BYTE_OID_LOOKUP_ID "\0" \ - "missing the OID Lookup chunk" + "commit-graph required OID lookup chunk missing or corrupted" ' test_expect_success 'detect missing commit data chunk' ' corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATA_ID "\0" \ - "missing the Commit Data chunk" + "commit-graph required commit data chunk missing or corrupted" ' test_expect_success 'detect incorrect fanout' ' @@ -842,7 +842,7 @@ test_expect_success 'reader notices too-small oid fanout chunk' ' check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) && cat >expect.err <<-\EOF && error: commit-graph oid fanout chunk is wrong size - error: commit-graph is missing the OID Fanout chunk + error: commit-graph required OID fanout chunk missing or corrupted EOF test_cmp expect.err err ' @@ -874,7 +874,7 @@ test_expect_success 'reader notices too-small commit data chunk' ' check_corrupt_chunk CDAT clear 00000000 && cat >expect.err <<-\EOF && error: commit-graph commit data chunk is wrong size - error: commit-graph is missing the Commit Data chunk + error: commit-graph required commit data chunk missing or corrupted EOF test_cmp expect.err err ' -- cgit v1.3-5-g9baa From d3b6f6c63137b72df5055b71721825e786bcbd6e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Nov 2023 02:24:35 -0500 Subject: commit-graph: use fanout value for graph size Commit-graph, midx, and pack idx files all have both a lookup table of oids and an oid fanout table. In midx and pack idx files, we take the final entry of the fanout table as the source of truth for the number of entries, and then verify that the size of the lookup table matches that. But for commit-graph files, we do the opposite: we use the size of the lookup table as the source of truth, and then check the final fanout entry against it. As noted in 4169d89645 (commit-graph: check consistency of fanout table, 2023-10-09), either is correct. But there are a few reasons to prefer the fanout table as the source of truth: 1. The fanout entries are 32-bits on disk, and that defines the maximum number of entries we can store. But since the size of the lookup table is only bounded by the filesystem, it can be much larger. And hence computing it as the commit-graph does means that we may truncate the result when storing it in a uint32_t. 2. We read the fanout first, then the lookup table. If we're verifying the chunks as we read them, then we'd want to take the fanout as truth (we have nothing yet to check it against) and then we can check that the lookup table matches what we already know. 3. It is pointlessly inconsistent with the midx and pack idx code. Since the three have to do similar size and bounds checks, it is easier to reason about all three if they use the same approach. So this patch moves the assignment of g->num_commits to the fanout parser, and then we can check the size of the lookup chunk as soon as we try to load it. There's already a test covering this situation, which munges the final fanout entry to 2^32-1. In the current code we complain that it does not agree with the table size. But now that we treat the munged value as the source of truth, we'll complain that the lookup table is the wrong size (again, either is correct). So we'll have to update the message we expect (and likewise for an earlier test which does similar munging). There's a similar test for this situation on the midx side, but rather than making a very-large fanout value, it just truncates the lookup table. We could do that here, too, but the very-large fanout value actually shows an interesting corner case. On a 32-bit system, multiplying to find the expected table size would cause an integer overflow. Using st_mult() would detect that, but cause us to die() rather than falling back to the non-graph code path. Checking the size using division (as we do with existing chunk-size checks) avoids the overflow entirely, and the test demonstrates this when run on a 32-bit system. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 8 +++----- t/t5318-commit-graph.sh | 5 +++-- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 't') diff --git a/commit-graph.c b/commit-graph.c index 6abb857323..a7d2fe883f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -300,10 +300,6 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 1; } } - if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) { - error("commit-graph oid table and fanout disagree on size"); - return 1; - } return 0; } @@ -315,6 +311,7 @@ static int graph_read_oid_fanout(const unsigned char *chunk_start, if (chunk_size != 256 * sizeof(uint32_t)) return error("commit-graph oid fanout chunk is wrong size"); g->chunk_oid_fanout = (const uint32_t *)chunk_start; + g->num_commits = ntohl(g->chunk_oid_fanout[255]); return 0; } @@ -323,7 +320,8 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start, { struct commit_graph *g = data; g->chunk_oid_lookup = chunk_start; - g->num_commits = chunk_size / g->hash_len; + if (chunk_size / g->hash_len != g->num_commits) + return error(_("commit-graph OID lookup chunk is the wrong size")); return 0; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 4c4a4ae13e..9d186e7b13 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -560,7 +560,7 @@ test_expect_success 'detect incorrect fanout' ' test_expect_success 'detect incorrect fanout final value' ' corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \ - "oid table and fanout disagree on size" + "OID lookup chunk is the wrong size" ' test_expect_success 'detect incorrect OID order' ' @@ -850,7 +850,8 @@ test_expect_success 'reader notices too-small oid fanout chunk' ' test_expect_success 'reader notices fanout/lookup table mismatch' ' check_corrupt_chunk OIDF 1020 "FFFFFFFF" && cat >expect.err <<-\EOF && - error: commit-graph oid table and fanout disagree on size + error: commit-graph OID lookup chunk is the wrong size + error: commit-graph required OID lookup chunk missing or corrupted EOF test_cmp expect.err err ' -- cgit v1.3-5-g9baa From 06fb135f8eddc64071a719fe309c771883c07775 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 9 Nov 2023 02:25:07 -0500 Subject: commit-graph: check order while reading fanout chunk We read the fanout chunk, storing a pointer to it, but only confirm that the entries are monotonic in a final "lite" verification step. Let's move that into the actual OIDF chunk callback, so that we can report problems immediately (for all the reasons given in the previous "commit-graph: abort as soon as we see a bogus chunk" commit). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit-graph.c | 25 +++++++++++++------------ t/t5318-commit-graph.sh | 1 + 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 't') diff --git a/commit-graph.c b/commit-graph.c index a7d2fe883f..e5f9e75e18 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -277,8 +277,6 @@ struct commit_graph *load_commit_graph_one_fd_st(struct repository *r, static int verify_commit_graph_lite(struct commit_graph *g) { - int i; - /* * Basic validation shared between parse_commit_graph() * which'll be called every time the graph is used, and the @@ -291,16 +289,6 @@ static int verify_commit_graph_lite(struct commit_graph *g) * over g->num_commits, or runs a checksum on the commit-graph * itself. */ - for (i = 0; i < 255; i++) { - uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]); - uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]); - - if (oid_fanout1 > oid_fanout2) { - error("commit-graph fanout values out of order"); - return 1; - } - } - return 0; } @@ -308,10 +296,23 @@ static int graph_read_oid_fanout(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = data; + int i; + if (chunk_size != 256 * sizeof(uint32_t)) return error("commit-graph oid fanout chunk is wrong size"); g->chunk_oid_fanout = (const uint32_t *)chunk_start; g->num_commits = ntohl(g->chunk_oid_fanout[255]); + + for (i = 0; i < 255; i++) { + uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]); + uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]); + + if (oid_fanout1 > oid_fanout2) { + error("commit-graph fanout values out of order"); + return 1; + } + } + return 0; } diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 9d186e7b13..b0d436a6f0 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -867,6 +867,7 @@ test_expect_success 'reader notices out-of-bounds fanout' ' check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) && cat >expect.err <<-\EOF && error: commit-graph fanout values out of order + error: commit-graph required OID fanout chunk missing or corrupted EOF test_cmp expect.err err ' -- cgit v1.3-5-g9baa