From 3d74a2337c679839265efa16b2bca2a9b7795a00 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:36 +0200 Subject: repack: fix trying to use preferred pack in alternates When doing a geometric repack with multi-pack-indices, then we ask git-multi-pack-index(1) to use the largest packfile as the preferred pack. It can happen though that the largest packfile is not part of the main object database, but instead part of an alternate object database. The result is that git-multi-pack-index(1) will not be able to find the preferred pack and print a warning. It then falls back to use the first packfile that the multi-pack-index shall reference. Fix this bug by only considering packfiles as preferred pack that are local. This is the right thing to do given that a multi-pack-index should never reference packfiles borrowed from an alternate. While at it, rename the function `get_largest_active_packfile()` to `get_preferred_pack()` to better document its intent. Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index f649379531..63585ad046 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -444,8 +444,10 @@ static void split_pack_geometry(struct pack_geometry *geometry, int factor) geometry->split = split; } -static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry) +static struct packed_git *get_preferred_pack(struct pack_geometry *geometry) { + uint32_t i; + if (!geometry) { /* * No geometry means either an all-into-one repack (in which @@ -460,7 +462,21 @@ static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry } if (geometry->split == geometry->pack_nr) return NULL; - return geometry->pack[geometry->pack_nr - 1]; + + /* + * The preferred pack is the largest pack above the split line. In + * other words, it is the largest pack that does not get rolled up in + * the geometric repack. + */ + for (i = geometry->pack_nr; i > geometry->split; i--) + /* + * A pack that is not local would never be included in a + * multi-pack index. We thus skip over any non-local packs. + */ + if (geometry->pack[i - 1]->pack_local) + return geometry->pack[i - 1]; + + return NULL; } static void clear_pack_geometry(struct pack_geometry *geometry) @@ -587,7 +603,7 @@ static int write_midx_included_packs(struct string_list *include, { struct child_process cmd = CHILD_PROCESS_INIT; struct string_list_item *item; - struct packed_git *largest = get_largest_active_pack(geometry); + struct packed_git *preferred = get_preferred_pack(geometry); FILE *in; int ret; @@ -608,9 +624,9 @@ static int write_midx_included_packs(struct string_list *include, if (write_bitmaps) strvec_push(&cmd.args, "--bitmap"); - if (largest) + if (preferred) strvec_pushf(&cmd.args, "--preferred-pack=%s", - pack_basename(largest)); + pack_basename(preferred)); if (refs_snapshot) strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot); -- cgit v1.3-5-g45d5 From 51861340f8d7f76a99e0d7265f4417b0a9a6871c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:40 +0200 Subject: repack: fix generating multi-pack-index with only non-local packs When writing the multi-pack-index with geometric repacking we will add all packfiles to the index that are part of the geometric sequence. This can potentially also include packfiles borrowed from an alternate object directory. But given that a multi-pack-index can only ever include packs that are part of the main object database this does not make much sense whatsoever. In the edge case where all packfiles are contained in the alternate object database and the local repository has none itself this bug can cause us to invoke git-multi-pack-index(1) with only non-local packfiles that it ultimately cannot find. This causes it to return an error and thus causes the geometric repack to fail. Fix the code to skip non-local packfiles. Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 11 +++++++++++ t/t7703-repack-geometric.sh | 23 +++++++++++++++++++++++ 2 files changed, 34 insertions(+) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 63585ad046..a591a4ddd6 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -570,6 +570,17 @@ static void midx_included_packs(struct string_list *include, for (i = geometry->split; i < geometry->pack_nr; i++) { struct packed_git *p = geometry->pack[i]; + /* + * The multi-pack index never refers to packfiles part + * of an alternate object database, so we skip these. + * While git-multi-pack-index(1) would silently ignore + * them anyway, this allows us to skip executing the + * command completely when we have only non-local + * packfiles. + */ + if (!p->pack_local) + continue; + strbuf_addstr(&buf, pack_basename(p)); strbuf_strip_suffix(&buf, ".pack"); strbuf_addstr(&buf, ".idx"); diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index 4abc7d4c55..9dd002437f 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -313,4 +313,27 @@ test_expect_success '--geometric --write-midx with packfiles in main and alterna test_cmp expect actual ' +test_expect_success '--geometric --with-midx with no local objects' ' + test_when_finished "rm -fr shared member" && + + # Create a repository with a single packfile that acts as alternate + # object database. + git init shared && + test_commit -C shared "shared-objects" && + git -C shared repack -ad && + + # Create a second repository linked to the first one and perform a + # geometric repack on it. + git clone --shared shared member && + git -C member repack --geometric 2 --write-midx 2>err && + test_must_be_empty err && + + # Assert that we wrote neither a new packfile nor a multi-pack-index. + # We should not have a packfile because the single packfile in the + # alternate object database does not invalidate the geometric sequence. + # And we should not have a multi-pack-index because these only index + # local packfiles, and there are none. + test_dir_is_empty member/$packdir +' + test_done -- cgit v1.3-5-g45d5 From 732194b5f2ff50fc536b28c3d5a3e43e0110419b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:49 +0200 Subject: pack-objects: fix error when packing same pack twice When passed the same packfile twice via `--stdin-packs` we return an error that the packfile supposedly was not found. This is because when reading packs into the list of included or excluded packfiles, we will happily re-add packfiles even if they are part of the lists already. And while the list can now contain duplicates, we will only set the `util` pointer of the first list entry to the `packed_git` structure. We notice that at a later point when checking that all list entries have their `util` pointer set and die with an error. While this is kind of a nonsensical request, this scenario can be hit when doing geometric repacks. When a repository is connected to an alternate object directory and both have the exact same packfile then both would get added to the geometric sequence. And when we then decide to perform the repack, we will invoke git-pack-objects(1) with the same packfile twice. Fix this bug by removing any duplicates from both the included and excluded packs. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ t/t5331-pack-objects-stdin.sh | 27 +++++++++++++++++++++++++++ t/t7703-repack-geometric.sh | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 74a167a180..c97ae1b6d0 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3344,7 +3344,9 @@ static void read_packs_list_from_stdin(void) } string_list_sort(&include_packs); + string_list_remove_duplicates(&include_packs, 0); string_list_sort(&exclude_packs); + string_list_remove_duplicates(&exclude_packs, 0); for (p = get_all_packs(the_repository); p; p = p->next) { const char *pack_name = pack_basename(p); diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index d5eece5899..71c8a4a635 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -7,6 +7,12 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh +packed_objects () { + git show-index <"$1" >tmp-object-list && + cut -d' ' -f2 tmp-object-list | sort && + rm tmp-object-list + } + test_expect_success 'setup for --stdin-packs tests' ' git init stdin-packs && ( @@ -142,4 +148,25 @@ test_expect_success '--stdin-packs with broken links' ' ) ' +test_expect_success 'pack-objects --stdin with duplicate packfile' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + test_commit "commit" && + git repack -ad && + + { + basename .git/objects/pack/pack-*.pack && + basename .git/objects/pack/pack-*.pack + } >packfiles && + + git pack-objects --stdin-packs generated-pack expect && + packed_objects generated-pack-*.idx >actual && + test_cmp expect actual + ) +' + test_done diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index 9dd002437f..a358dfb7bd 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -336,4 +336,29 @@ test_expect_success '--geometric --with-midx with no local objects' ' test_dir_is_empty member/$packdir ' +test_expect_success '--geometric with same pack in main and alternate ODB' ' + test_when_finished "rm -fr shared member" && + + # Create a repository with a single packfile that acts as alternate + # object database. + git init shared && + test_commit -C shared "shared-objects" && + git -C shared repack -ad && + + # We create the member repository as an exact copy so that it has the + # same packfile. + cp -r shared member && + test-tool path-utils real_path shared/.git/objects >member/.git/objects/info/alternates && + find shared/.git/objects -type f >expected-files && + + # Verify that we can repack objects as expected without observing any + # error. Having the same packfile in both ODBs used to cause an error + # in git-pack-objects(1). + git -C member repack --geometric 2 2>err && + test_must_be_empty err && + # Nothing should have changed. + find shared/.git/objects -type f >actual-files && + test_cmp expected-files actual-files +' + test_done -- cgit v1.3-5-g45d5 From 752b465c3c0fd7f503b50c326017b8b13af83c3b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:01:54 +0200 Subject: pack-objects: fix error when same packfile is included and excluded When passing the same packfile both as included and excluded via the `--stdin-packs` option, then we will return an error because the excluded packfile cannot be found. This is because we will only set the `util` pointer for the included packfile list if it was found, so that we later die when we notice that it's in fact not set for the excluded packfile list. Fix this bug by always setting the `util` pointer for both the included and excluded list entries. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 8 +++----- t/t5331-pack-objects-stdin.sh | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index c97ae1b6d0..884b84a2fd 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3351,11 +3351,9 @@ static void read_packs_list_from_stdin(void) for (p = get_all_packs(the_repository); p; p = p->next) { const char *pack_name = pack_basename(p); - item = string_list_lookup(&include_packs, pack_name); - if (!item) - item = string_list_lookup(&exclude_packs, pack_name); - - if (item) + if ((item = string_list_lookup(&include_packs, pack_name))) + item->util = p; + if ((item = string_list_lookup(&exclude_packs, pack_name))) item->util = p; } diff --git a/t/t5331-pack-objects-stdin.sh b/t/t5331-pack-objects-stdin.sh index 71c8a4a635..3ef736ec05 100755 --- a/t/t5331-pack-objects-stdin.sh +++ b/t/t5331-pack-objects-stdin.sh @@ -169,4 +169,24 @@ test_expect_success 'pack-objects --stdin with duplicate packfile' ' ) ' +test_expect_success 'pack-objects --stdin with same packfile excluded and included' ' + test_when_finished "rm -fr repo" && + + git init repo && + ( + cd repo && + test_commit "commit" && + git repack -ad && + + { + basename .git/objects/pack/pack-*.pack && + printf "^%s\n" "$(basename .git/objects/pack/pack-*.pack)" + } >packfiles && + + git pack-objects --stdin-packs generated-pack packed-objects && + test_must_be_empty packed-objects + ) +' + test_done -- cgit v1.3-5-g45d5 From 932c16c04b5e41ee1c76d5640ec3ae67e1900c07 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:08 +0200 Subject: repack: honor `-l` when calculating pack geometry When the user passes `-l` to git-repack(1), then they essentially ask us to only repack objects part of the local object database while ignoring any packfiles part of an alternate object database. And we in fact honor this bit when doing a geometric repack as the resulting packfile will only ever contain local objects. What we're missing though is that we don't take locality of packfiles into account when computing whether the geometric sequence is intact or not. So even though we would only ever roll up local packfiles anyway, we could end up trying to repack because of non-local packfiles. This does not make much sense, and in the worst case it can cause us to try and do the geometric repack over and over again because we're never able to restore the geometric sequence. Fix this bug by honoring whether the user has passed `-l`. If so, we skip adding any non-local packfiles to the pack geometry. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/repack.c | 13 +++++++++-- t/t7703-repack-geometric.sh | 57 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index a591a4ddd6..165fe1b628 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -321,7 +321,8 @@ static int geometry_cmp(const void *va, const void *vb) } static void init_pack_geometry(struct pack_geometry **geometry_p, - struct string_list *existing_kept_packs) + struct string_list *existing_kept_packs, + const struct pack_objects_args *args) { struct packed_git *p; struct pack_geometry *geometry; @@ -331,6 +332,14 @@ static void init_pack_geometry(struct pack_geometry **geometry_p, geometry = *geometry_p; for (p = get_all_packs(the_repository); p; p = p->next) { + if (args->local && !p->pack_local) + /* + * When asked to only repack local packfiles we skip + * over any packfiles that are borrowed from alternate + * object directories. + */ + continue; + if (!pack_kept_objects) { /* * Any pack that has its pack_keep bit set will appear @@ -898,7 +907,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (geometric_factor) { if (pack_everything) die(_("options '%s' and '%s' cannot be used together"), "--geometric", "-A/-a"); - init_pack_geometry(&geometry, &existing_kept_packs); + init_pack_geometry(&geometry, &existing_kept_packs, &po_args); split_pack_geometry(geometry, geometric_factor); } diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index a358dfb7bd..be8fe78448 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -10,6 +10,12 @@ objdir=.git/objects packdir=$objdir/pack midx=$objdir/pack/multi-pack-index +packed_objects () { + git show-index <"$1" >tmp-object-list && + cut -d' ' -f2 tmp-object-list | sort && + rm tmp-object-list + } + test_expect_success '--geometric with no packs' ' git init geometric && test_when_finished "rm -fr geometric" && @@ -361,4 +367,55 @@ test_expect_success '--geometric with same pack in main and alternate ODB' ' test_cmp expected-files actual-files ' +test_expect_success '--geometric -l with non-intact geometric sequence across ODBs' ' + test_when_finished "rm -fr shared member" && + + git init shared && + test_commit_bulk -C shared --start=1 1 && + + git clone --shared shared member && + test_commit_bulk -C member --start=2 1 && + + # Verify that our assumptions actually hold: both generated packfiles + # should have three objects and should be non-equal. + packed_objects shared/.git/objects/pack/pack-*.idx >shared-objects && + packed_objects member/.git/objects/pack/pack-*.idx >member-objects && + test_line_count = 3 shared-objects && + test_line_count = 3 member-objects && + ! test_cmp shared-objects member-objects && + + # Perform the geometric repack. With `-l`, we should only see the local + # packfile and thus arrive at the conclusion that the geometric + # sequence is intact. We thus expect no changes. + # + # Note that we are tweaking mtimes of the packfiles so that we can + # verify they did not change. This is done in order to detect the case + # where we do repack objects, but the resulting packfile is the same. + test-tool chmtime --verbose =0 member/.git/objects/pack/* >expected-member-packs && + git -C member repack --geometric=2 -l -d && + test-tool chmtime --verbose member/.git/objects/pack/* >actual-member-packs && + test_cmp expected-member-packs actual-member-packs && + + { + packed_objects shared/.git/objects/pack/pack-*.idx && + packed_objects member/.git/objects/pack/pack-*.idx + } | sort >expected-objects && + + # On the other hand, when doing a non-local geometric repack we should + # see both packfiles and thus repack them. We expect that the shared + # object database was not changed. + test-tool chmtime --verbose =0 shared/.git/objects/pack/* >expected-shared-packs && + git -C member repack --geometric=2 -d && + test-tool chmtime --verbose shared/.git/objects/pack/* >actual-shared-packs && + test_cmp expected-shared-packs actual-shared-packs && + + # Furthermore, we expect that the member repository now has a single + # packfile that contains the combined shared and non-shared objects. + ls member/.git/objects/pack/pack-*.idx >actual && + test_line_count = 1 actual && + packed_objects member/.git/objects/pack/pack-*.idx >actual-objects && + test_line_count = 6 actual-objects && + test_cmp expected-objects actual-objects +' + test_done -- cgit v1.3-5-g45d5 From d85cd1877777aa92c73868b9e86516d4be04b4a0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 14 Apr 2023 08:02:12 +0200 Subject: repack: disable writing bitmaps when doing a local repack In order to write a bitmap, we need to have full coverage of all objects that are about to be packed. In the traditional non-multi-pack-index world this meant we need to do a full repack of all objects into a single packfile. But in the new multi-pack-index world we can get away with writing bitmaps when we have multiple packfiles as long as the multi-pack-index covers all objects. This is not always the case though. When asked to perform a repack of local objects, only, then we cannot guarantee to have full coverage of all objects regardless of whether we do a full repack or a repack with a multi-pack-index. The end result is that writing the bitmap will fail in both worlds: $ git multi-pack-index write --stdin-packs --bitmap Signed-off-by: Junio C Hamano --- builtin/repack.c | 12 ++++++++++++ object-file.c | 6 ++++++ object-store.h | 1 + t/t7700-repack.sh | 17 +++++++++++++++++ t/t7703-repack-geometric.sh | 27 +++++++++++++++++++++++++++ 5 files changed, 63 insertions(+) (limited to 'builtin') diff --git a/builtin/repack.c b/builtin/repack.c index 165fe1b628..c26f06769b 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -885,6 +885,18 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (write_bitmaps && !(pack_everything & ALL_INTO_ONE) && !write_midx) die(_(incremental_bitmap_conflict_error)); + if (write_bitmaps && po_args.local && has_alt_odb(the_repository)) { + /* + * When asked to do a local repack, but we have + * packfiles that are inherited from an alternate, then + * we cannot guarantee that the multi-pack-index would + * have full coverage of all objects. We thus disable + * writing bitmaps in that case. + */ + warning(_("disabling bitmap writing, as some objects are not being packed")); + write_bitmaps = 0; + } + if (write_midx && write_bitmaps) { struct strbuf path = STRBUF_INIT; diff --git a/object-file.c b/object-file.c index 939865c1ae..a6574b265d 100644 --- a/object-file.c +++ b/object-file.c @@ -944,6 +944,12 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } +int has_alt_odb(struct repository *r) +{ + prepare_alt_odb(r); + return !!r->objects->odb->next; +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { diff --git a/object-store.h b/object-store.h index 1a713d89d7..8ba010a9d6 100644 --- a/object-store.h +++ b/object-store.h @@ -56,6 +56,7 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, struct object_directory *, 1, fspathhash, fspatheq) void prepare_alt_odb(struct repository *r); +int has_alt_odb(struct repository *r); char *compute_alternate_path(const char *path, struct strbuf *err); struct object_directory *find_odb(struct repository *r, const char *obj_dir); typedef int alt_odb_fn(struct object_directory *, void *); diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 4aabe98139..faa739eeb9 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -106,6 +106,23 @@ test_expect_success SYMLINKS '--local keeps packs when alternate is objectdir ' test_cmp expect actual ' +test_expect_success '--local disables writing bitmaps when connected to alternate ODB' ' + test_when_finished "rm -rf shared member" && + + git init shared && + git clone --shared shared member && + ( + cd member && + test_commit "object" && + GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adl --write-bitmap-index 2>err && + cat >expect <<-EOF && + warning: disabling bitmap writing, as some objects are not being packed + EOF + test_cmp expect err && + test_path_is_missing .git/objects/pack-*.bitmap + ) +' + test_expect_success 'packed obs in alt ODB are repacked even when local repo is packless' ' mkdir alt_objects/pack && mv .git/objects/pack/* alt_objects/pack && diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh index be8fe78448..00f28fb558 100755 --- a/t/t7703-repack-geometric.sh +++ b/t/t7703-repack-geometric.sh @@ -418,4 +418,31 @@ test_expect_success '--geometric -l with non-intact geometric sequence across OD test_cmp expected-objects actual-objects ' +test_expect_success '--geometric -l disables writing bitmaps with non-local packfiles' ' + test_when_finished "rm -fr shared member" && + + git init shared && + test_commit_bulk -C shared --start=1 1 && + + git clone --shared shared member && + test_commit_bulk -C member --start=2 1 && + + # When performing a geometric repack with `-l` while connected to an + # alternate object database that has a packfile we do not have full + # coverage of objects. As a result, we expect that writing the bitmap + # will be disabled. + git -C member repack -l --geometric=2 --write-midx --write-bitmap-index 2>err && + cat >expect <<-EOF && + warning: disabling bitmap writing, as some objects are not being packed + EOF + test_cmp expect err && + test_path_is_missing member/.git/objects/pack/multi-pack-index-*.bitmap && + + # On the other hand, when we repack without `-l`, we should see that + # the bitmap gets created. + git -C member repack --geometric=2 --write-midx --write-bitmap-index 2>err && + test_must_be_empty err && + test_path_is_file member/.git/objects/pack/multi-pack-index-*.bitmap +' + test_done -- cgit v1.3-5-g45d5