From 791170fa2b23cfc49ae0e5949b1f301431a6058b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 25 Jan 2022 17:41:13 -0500 Subject: t5326: move tests to t/lib-bitmap.sh In t5326, we have a handful of tests that we would like to run twice: once using the MIDX's new `RIDX` chunk as the source of the reverse-index cache, and once using the separate `.rev` file. But because these tests mutate the state of the underlying repository, and then make assumptions about those mutations occurring in a certain sequence, simply running the tests twice in the same repository is awkward. Instead, extract the core of interesting tests into t/lib-bitmap.sh to prepare for them to be run twice, each in a separate test script. This means that they can each operate on a separate repository, removing any concerns about mutating state. For now, this patch is a strict cut-and-paste of some tests from t5326. The tests which did not move are not interesting with respect to the source of their reverse index data. Signed-off-by: Taylor Blau Reviewed-by: Derrick Stolee Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- t/lib-bitmap.sh | 177 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) (limited to 't/lib-bitmap.sh') diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index 21d0392dda..48a8730a13 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -1,6 +1,9 @@ # Helpers for scripts testing bitmap functionality; see t5310 for # example usage. +objdir=.git/objects +midx=$objdir/pack/multi-pack-index + # Compare a file containing rev-list bitmap traversal output to its non-bitmap # counterpart. You can't just use test_cmp for this, because the two produce # subtly different output: @@ -264,3 +267,177 @@ have_delta () { midx_checksum () { test-tool read-midx --checksum "$1" } + +# midx_pack_source +midx_pack_source () { + test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2 +} + +test_rev_exists () { + commit="$1" + + test_expect_success 'reverse index exists' ' + GIT_TRACE2_EVENT=$(pwd)/event.trace \ + git rev-list --test-bitmap "$commit" && + + test_path_is_file $midx-$(midx_checksum $objdir).rev && + grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"rev\"" event.trace + ' +} + +midx_bitmap_core () { + setup_bitmap_history + + test_expect_success 'create single-pack midx with bitmaps' ' + git repack -ad && + git multi-pack-index write --bitmap && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap + ' + + test_rev_exists HEAD + + basic_bitmap_tests + + test_expect_success 'create new additional packs' ' + for i in $(test_seq 1 16) + do + test_commit "$i" && + git repack -d || return 1 + done && + + git checkout -b other2 HEAD~8 && + for i in $(test_seq 1 8) + do + test_commit "side-$i" && + git repack -d || return 1 + done && + git checkout second + ' + + test_expect_success 'create multi-pack midx with bitmaps' ' + git multi-pack-index write --bitmap && + + ls $objdir/pack/pack-*.pack >packs && + test_line_count = 25 packs && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap + ' + + test_rev_exists HEAD + + basic_bitmap_tests + + test_expect_success '--no-bitmap is respected when bitmaps exist' ' + git multi-pack-index write --bitmap && + + test_commit respect--no-bitmap && + git repack -d && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + + git multi-pack-index write --no-bitmap && + + test_path_is_file $midx && + test_path_is_missing $midx-$(midx_checksum $objdir).bitmap && + test_path_is_missing $midx-$(midx_checksum $objdir).rev + ' + + test_expect_success 'setup midx with base from later pack' ' + # Write a and b so that "a" is a delta on top of base "b", since Git + # prefers to delete contents out of a base rather than add to a shorter + # object. + test_seq 1 128 >a && + test_seq 1 130 >b && + + git add a b && + git commit -m "initial commit" && + + a=$(git rev-parse HEAD:a) && + b=$(git rev-parse HEAD:b) && + + # In the first pack, "a" is stored as a delta to "b". + p1=$(git pack-objects .git/objects/pack/pack <<-EOF + $a + $b + EOF + ) && + + # In the second pack, "a" is missing, and "b" is not a delta nor base to + # any other object. + p2=$(git pack-objects .git/objects/pack/pack <<-EOF + $b + $(git rev-parse HEAD) + $(git rev-parse HEAD^{tree}) + EOF + ) && + + git prune-packed && + # Use the second pack as the preferred source, so that "b" occurs + # earlier in the MIDX object order, rendering "a" unusable for pack + # reuse. + git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx && + + have_delta $a $b && + test $(midx_pack_source $a) != $(midx_pack_source $b) + ' + + rev_list_tests 'full bitmap with backwards delta' + + test_expect_success 'clone with bitmaps enabled' ' + git clone --no-local --bare . clone-reverse-delta.git && + test_when_finished "rm -fr clone-reverse-delta.git" && + + git rev-parse HEAD >expect && + git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual && + test_cmp expect actual + ' + + test_expect_success 'changing the preferred pack does not corrupt bitmaps' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit A && + test_commit B && + + git rev-list --objects --no-object-names HEAD^ >A.objects && + git rev-list --objects --no-object-names HEAD^.. >B.objects && + + A=$(git pack-objects $objdir/pack/pack indexes <<-EOF && + pack-$A.idx + pack-$B.idx + EOF + + git multi-pack-index write --bitmap --stdin-packs \ + --preferred-pack=pack-$A.pack err && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap + ' + + test_rev_exists HEAD~ + + basic_bitmap_tests HEAD~ +} -- cgit v1.3 From a80f0f91b1d6586abe0b37d19c0bd60ce1843571 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 25 Jan 2022 17:41:15 -0500 Subject: t/lib-bitmap.sh: parameterize tests over reverse index source To prepare for reading the reverse index data out of the MIDX itself, teach the `test_rev_exists` function to take an expected "source" for the reverse index data. When given "rev", it asserts that the MIDX's `.rev` file exists, and is loaded when verifying the integrity of its bitmaps. Otherwise, it ensures that trace2 reports the source of the reverse index data as the same string which was given to test_rev_exists(). The following patch will implement reading the reverse index data from the MIDX itself. Signed-off-by: Taylor Blau Reviewed-by: Derrick Stolee Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- t/lib-bitmap.sh | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 't/lib-bitmap.sh') diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index 48a8730a13..253895c04e 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -275,17 +275,23 @@ midx_pack_source () { test_rev_exists () { commit="$1" + kind="$2" - test_expect_success 'reverse index exists' ' + test_expect_success "reverse index exists ($kind)" ' GIT_TRACE2_EVENT=$(pwd)/event.trace \ git rev-list --test-bitmap "$commit" && - test_path_is_file $midx-$(midx_checksum $objdir).rev && - grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"rev\"" event.trace + if test "rev" = "$kind" + then + test_path_is_file $midx-$(midx_checksum $objdir).rev + fi && + grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"$kind\"" event.trace ' } midx_bitmap_core () { + rev_kind="${1:-rev}" + setup_bitmap_history test_expect_success 'create single-pack midx with bitmaps' ' @@ -295,7 +301,7 @@ midx_bitmap_core () { test_path_is_file $midx-$(midx_checksum $objdir).bitmap ' - test_rev_exists HEAD + test_rev_exists HEAD "$rev_kind" basic_bitmap_tests @@ -325,7 +331,7 @@ midx_bitmap_core () { test_path_is_file $midx-$(midx_checksum $objdir).bitmap ' - test_rev_exists HEAD + test_rev_exists HEAD "$rev_kind" basic_bitmap_tests @@ -428,6 +434,8 @@ midx_bitmap_core () { } midx_bitmap_partial_tests () { + rev_kind="${1:-rev}" + test_expect_success 'setup partial bitmaps' ' test_commit packed && git repack && @@ -437,7 +445,7 @@ midx_bitmap_partial_tests () { test_path_is_file $midx-$(midx_checksum $objdir).bitmap ' - test_rev_exists HEAD~ + test_rev_exists HEAD~ "$rev_kind" basic_bitmap_tests HEAD~ } -- cgit v1.3 From 7f514b7a5e775cd0b7a9543b02bf9dd38b164d02 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 25 Jan 2022 17:41:17 -0500 Subject: midx: read `RIDX` chunk when present When a MIDX contains the new `RIDX` chunk, ensure that the reverse index is read from it instead of the on-disk .rev file. Since we need to encode the object order in the MIDX itself for correctness reasons, there is no point in storing the same data again outside of the MIDX. So, this patch stops writing separate .rev files, and reads it out of the MIDX itself. This is possible to do with relatively little new code, since the format of the RIDX chunk is identical to the data in the .rev file. In other words, we can implement this by pointing the `revindex_data` field at the reverse index chunk of the MIDX instead of the .rev file without any other changes. Note that we have two knobs that are adjusted for the new tests: GIT_TEST_MIDX_WRITE_REV and GIT_TEST_MIDX_READ_RIDX. The former controls whether the MIDX .rev is written at all, and the latter controls whether we read the MIDX's RIDX chunk. Both are necessary to ensure that the test added at the beginning of this series continues to work. This is because we always need to write the RIDX chunk in the MIDX in order to change its checksum, but we want to make sure reading the existing .rev file still works (since the RIDX chunk takes precedence by default). Arguably this isn't a very interesting mode to test, because the precedence rules mean that we'll always read the RIDX chunk over the .rev file. But it makes it impossible for a user to induce corruption in their repository by adjusting the test knobs (since if we had an either/or knob they could stop writing the RIDX chunk, allowing them to tweak the MIDX's object order without changing its checksum). Signed-off-by: Taylor Blau Reviewed-by: Derrick Stolee Reviewed-by: Jonathan Tan Signed-off-by: Junio C Hamano --- midx.c | 6 +++++- midx.h | 1 + pack-revindex.c | 17 +++++++++++++++++ t/lib-bitmap.sh | 4 ++-- t/t5326-multi-pack-bitmaps.sh | 6 ++++++ t/t5327-multi-pack-bitmaps-rev.sh | 23 +++++++++++++++++++++++ t/t7700-repack.sh | 4 ---- 7 files changed, 54 insertions(+), 7 deletions(-) create mode 100755 t/t5327-multi-pack-bitmaps-rev.sh (limited to 't/lib-bitmap.sh') diff --git a/midx.c b/midx.c index 0248c4577c..6e6cb0eb04 100644 --- a/midx.c +++ b/midx.c @@ -162,6 +162,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets); + if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) + pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex); + m->num_objects = ntohl(m->chunk_oid_fanout[255]); CALLOC_ARRAY(m->pack_names, m->num_packs); @@ -1429,7 +1432,8 @@ static int write_midx_internal(const char *object_dir, finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX) + if (flags & MIDX_WRITE_REV_INDEX && + git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(midx_name.buf, midx_hash, &ctx); if (flags & MIDX_WRITE_BITMAP) { if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx, diff --git a/midx.h b/midx.h index b7d79a515c..22e8e53288 100644 --- a/midx.h +++ b/midx.h @@ -36,6 +36,7 @@ struct multi_pack_index { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; const unsigned char *chunk_large_offsets; + const unsigned char *chunk_revindex; const char **pack_names; struct packed_git **packs; diff --git a/pack-revindex.c b/pack-revindex.c index bd15ebad03..08dc160167 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -298,9 +298,26 @@ int load_midx_revindex(struct multi_pack_index *m) { struct strbuf revindex_name = STRBUF_INIT; int ret; + if (m->revindex_data) return 0; + if (m->chunk_revindex) { + /* + * If the MIDX `m` has a `RIDX` chunk, then use its contents for + * the reverse index instead of trying to load a separate `.rev` + * file. + * + * Note that we do *not* set `m->revindex_map` here, since we do + * not want to accidentally call munmap() in the middle of the + * MIDX. + */ + trace2_data_string("load_midx_revindex", the_repository, + "source", "midx"); + m->revindex_data = (const uint32_t *)m->chunk_revindex; + return 0; + } + trace2_data_string("load_midx_revindex", the_repository, "source", "rev"); diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index 253895c04e..a95537e759 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -290,7 +290,7 @@ test_rev_exists () { } midx_bitmap_core () { - rev_kind="${1:-rev}" + rev_kind="${1:-midx}" setup_bitmap_history @@ -434,7 +434,7 @@ midx_bitmap_core () { } midx_bitmap_partial_tests () { - rev_kind="${1:-rev}" + rev_kind="${1:-midx}" test_expect_success 'setup partial bitmaps' ' test_commit packed && diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh index 100ac90d15..c0924074c4 100755 --- a/t/t5326-multi-pack-bitmaps.sh +++ b/t/t5326-multi-pack-bitmaps.sh @@ -9,6 +9,12 @@ test_description='exercise basic multi-pack bitmap functionality' GIT_TEST_MULTI_PACK_INDEX=0 GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +# This test exercise multi-pack bitmap functionality where the object order is +# stored and read from a special chunk within the MIDX, so use the default +# behavior here. +sane_unset GIT_TEST_MIDX_WRITE_REV +sane_unset GIT_TEST_MIDX_READ_RIDX + midx_bitmap_core bitmap_reuse_tests() { diff --git a/t/t5327-multi-pack-bitmaps-rev.sh b/t/t5327-multi-pack-bitmaps-rev.sh new file mode 100755 index 0000000000..d30ba632c8 --- /dev/null +++ b/t/t5327-multi-pack-bitmaps-rev.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +test_description='exercise basic multi-pack bitmap functionality (.rev files)' + +. ./test-lib.sh +. "${TEST_DIRECTORY}/lib-bitmap.sh" + +# We'll be writing our own midx and bitmaps, so avoid getting confused by the +# automatic ones. +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + +# Unlike t5326, this test exercise multi-pack bitmap functionality where the +# object order is stored in a separate .rev file. +GIT_TEST_MIDX_WRITE_REV=1 +GIT_TEST_MIDX_READ_RIDX=0 +export GIT_TEST_MIDX_WRITE_REV +export GIT_TEST_MIDX_READ_RIDX + +midx_bitmap_core rev +midx_bitmap_partial_tests rev + +test_done diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 4693f8dc2b..02a6633a16 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -311,16 +311,13 @@ test_expect_success 'cleans up MIDX when appropriate' ' checksum=$(midx_checksum $objdir) && test_path_is_file $midx && test_path_is_file $midx-$checksum.bitmap && - test_path_is_file $midx-$checksum.rev && test_commit repack-3 && GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx && test_path_is_file $midx && test_path_is_missing $midx-$checksum.bitmap && - test_path_is_missing $midx-$checksum.rev && test_path_is_file $midx-$(midx_checksum $objdir).bitmap && - test_path_is_file $midx-$(midx_checksum $objdir).rev && test_commit repack-4 && GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb && @@ -353,7 +350,6 @@ test_expect_success '--write-midx with preferred bitmap tips' ' test_line_count = 1 before && rm -fr $midx-$(midx_checksum $objdir).bitmap && - rm -fr $midx-$(midx_checksum $objdir).rev && rm -fr $midx && # instead of constructing the snapshot ourselves (c.f., the test -- cgit v1.3