From 41f43b8243f42b9df2e98be8460646d4c0100ad3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 6 Dec 2024 11:27:19 +0100 Subject: global: mark code units that generate warnings with `-Wsign-compare` Mark code units that generate warnings with `-Wsign-compare`. This allows for a structured approach to get rid of all such warnings over time in a way that can be easily measured. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 891eaa2b4b..5b792b3dd4 100644 --- a/object-file.c +++ b/object-file.c @@ -8,6 +8,7 @@ */ #define USE_THE_REPOSITORY_VARIABLE +#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "abspath.h" -- cgit v1.3 From 0ad3d656521aa16a6496aa855bbde97160a2b2bc Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 30 Dec 2024 11:32:23 +0100 Subject: object-file: fix race in object collision check One of the tests in t5616 asserts that git-fetch(1) with `--refetch` triggers repository maintenance with the correct set of arguments. This test is flaky and causes us to fail sometimes: ++ git -c protocol.version=0 -c gc.autoPackLimit=0 -c maintenance.incremental-repack.auto=1234 -C pc1 fetch --refetch origin error: unable to open .git/objects/pack/pack-029d08823bd8a8eab510ad6ac75c823cfd3ed31e.pack: No such file or directory fatal: unable to rename temporary file to '.git/objects/pack/pack-029d08823bd8a8eab510ad6ac75c823cfd3ed31e.pack' fatal: could not finish pack-objects to repack local links fatal: index-pack failed error: last command exited with $?=128 The error message is quite confusing as it talks about trying to rename a temporary packfile. A first hunch would thus be that this packfile gets written by git-fetch(1), but removed by git-maintenance(1) while it hasn't yet been finalized, which shouldn't ever happen. And indeed, when looking closer one notices that the file that is supposedly of temporary nature does not have the typical `tmp_pack_` prefix. As it turns out, the "unable to rename temporary file" fatal error is a red herring and the real error is "unable to open". That error is raised by `check_collision()`, which is called by `finalize_object_file()` when moving the new packfile into place. Because t5616 re-fetches objects, we end up with the exact same pack as we already have in the repository. So when the concurrent git-maintenance(1) process rewrites the preexisting pack and unlinks it exactly at the point in time where git-fetch(1) wants to check the old and new packfiles for equality we will see ENOENT and thus `check_collision()` returns an error, which gets bubbled up by `finalize_object_file()` and is then handled by `rename_tmp_packfile()`. That function does not know about the exact root cause of the error and instead just claims that the rename has failed. This race is thus caused by b1b8dfde69 (finalize_object_file(): implement collision check, 2024-09-26), where we have newly introduced the collision check. By definition, two files cannot collide with each other when one of them has been removed. We can thus trivially fix the issue by ignoring ENOENT when opening either of the files we're about to check for collision. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index b1a3463852..0293b93bbc 100644 --- a/object-file.c +++ b/object-file.c @@ -1982,13 +1982,15 @@ static int check_collision(const char *filename_a, const char *filename_b) fd_a = open(filename_a, O_RDONLY); if (fd_a < 0) { - ret = error_errno(_("unable to open %s"), filename_a); + if (errno != ENOENT) + ret = error_errno(_("unable to open %s"), filename_a); goto out; } fd_b = open(filename_b, O_RDONLY); if (fd_b < 0) { - ret = error_errno(_("unable to open %s"), filename_b); + if (errno != ENOENT) + ret = error_errno(_("unable to open %s"), filename_b); goto out; } -- cgit v1.3 From c1acf1a31761d0cfddc3ea6d39c92a6528cd9c5c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 6 Jan 2025 10:24:25 +0100 Subject: object-file: rename variables in `check_collision()` Rename variables used in `check_collision()` to clearly identify which file is the source and which is the destination. This will make the next step easier to reason about when we start to treat those files different from one another. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 0293b93bbc..e2fa1be303 100644 --- a/object-file.c +++ b/object-file.c @@ -1974,56 +1974,56 @@ static void write_object_file_prepare_literally(const struct git_hash_algo *algo hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); } -static int check_collision(const char *filename_a, const char *filename_b) +static int check_collision(const char *source, const char *dest) { - char buf_a[4096], buf_b[4096]; - int fd_a = -1, fd_b = -1; + char buf_source[4096], buf_dest[4096]; + int fd_source = -1, fd_dest = -1; int ret = 0; - fd_a = open(filename_a, O_RDONLY); - if (fd_a < 0) { + fd_source = open(source, O_RDONLY); + if (fd_source < 0) { if (errno != ENOENT) - ret = error_errno(_("unable to open %s"), filename_a); + ret = error_errno(_("unable to open %s"), source); goto out; } - fd_b = open(filename_b, O_RDONLY); - if (fd_b < 0) { + fd_dest = open(dest, O_RDONLY); + if (fd_dest < 0) { if (errno != ENOENT) - ret = error_errno(_("unable to open %s"), filename_b); + ret = error_errno(_("unable to open %s"), dest); goto out; } while (1) { ssize_t sz_a, sz_b; - sz_a = read_in_full(fd_a, buf_a, sizeof(buf_a)); + sz_a = read_in_full(fd_source, buf_source, sizeof(buf_source)); if (sz_a < 0) { - ret = error_errno(_("unable to read %s"), filename_a); + ret = error_errno(_("unable to read %s"), source); goto out; } - sz_b = read_in_full(fd_b, buf_b, sizeof(buf_b)); + sz_b = read_in_full(fd_dest, buf_dest, sizeof(buf_dest)); if (sz_b < 0) { - ret = error_errno(_("unable to read %s"), filename_b); + ret = error_errno(_("unable to read %s"), dest); goto out; } - if (sz_a != sz_b || memcmp(buf_a, buf_b, sz_a)) { + if (sz_a != sz_b || memcmp(buf_source, buf_dest, sz_a)) { ret = error(_("files '%s' and '%s' differ in contents"), - filename_a, filename_b); + source, dest); goto out; } - if (sz_a < sizeof(buf_a)) + if (sz_a < sizeof(buf_source)) break; } out: - if (fd_a > -1) - close(fd_a); - if (fd_b > -1) - close(fd_b); + if (fd_source > -1) + close(fd_source); + if (fd_dest > -1) + close(fd_dest); return ret; } -- cgit v1.3 From cfae50e40eb72d6116ad56c616b3322474df4a75 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 6 Jan 2025 10:24:26 +0100 Subject: object-file: don't special-case missing source file in collision check In 0ad3d65652 (object-file: fix race in object collision check, 2024-12-30) we have started to ignore ENOENT when opening either the source or destination file of the collision check. This was done to handle races more gracefully in case either of the potentially-colliding disappears. The fix is overly broad though: while the destination file may indeed vanish racily, this shouldn't ever happen for the source file, which is a temporary object file (either loose or in packfile format) that we have just created. So if any concurrent process would have removed that temporary file it would indicate an actual issue. Stop treating ENOENT specially for the source file so that we always bubble up this error. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index e2fa1be303..c1bd746d9e 100644 --- a/object-file.c +++ b/object-file.c @@ -1982,8 +1982,7 @@ static int check_collision(const char *source, const char *dest) fd_source = open(source, O_RDONLY); if (fd_source < 0) { - if (errno != ENOENT) - ret = error_errno(_("unable to open %s"), source); + ret = error_errno(_("unable to open %s"), source); goto out; } -- cgit v1.3 From d7fcbe2c56468ac780c689b02c6a9e056ce39c12 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 6 Jan 2025 10:24:27 +0100 Subject: object-file: retry linking file into place when occluding file vanishes Prior to 0ad3d65652 (object-file: fix race in object collision check, 2024-12-30), callers could expect that a successful return from `finalize_object_file()` means that either the file was moved into place, or the identical bytes were already present. If neither of those happens, we'd return an error. Since that commit, if the destination file disappears between our link(3p) call and the collision check, we'd return success without actually checking the contents, and without retrying the link. This solves the common case that the files were indeed the same, but it means that we may corrupt the repository if they weren't (this implies a hash collision, but the whole point of this function is protecting against hash collisions). We can't be pessimistic and assume they're different; that hurts the common case that the mentioned commit was trying to fix. But after seeing that the destination file went away, we can retry linking again. Adapt the code to do so when we see that the destination file has racily vanished. This should generally succeed as we have just observed that the destination file does not exist anymore, except in the very unlikely event that it gets recreated by another concurrent process again. Helped-by: Jeff King Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index c1bd746d9e..008ddf59a5 100644 --- a/object-file.c +++ b/object-file.c @@ -1974,6 +1974,8 @@ static void write_object_file_prepare_literally(const struct git_hash_algo *algo hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen); } +#define CHECK_COLLISION_DEST_VANISHED -2 + static int check_collision(const char *source, const char *dest) { char buf_source[4096], buf_dest[4096]; @@ -1990,6 +1992,8 @@ static int check_collision(const char *source, const char *dest) if (fd_dest < 0) { if (errno != ENOENT) ret = error_errno(_("unable to open %s"), dest); + else + ret = CHECK_COLLISION_DEST_VANISHED; goto out; } @@ -2037,8 +2041,11 @@ int finalize_object_file(const char *tmpfile, const char *filename) int finalize_object_file_flags(const char *tmpfile, const char *filename, enum finalize_object_file_flags flags) { - struct stat st; - int ret = 0; + unsigned retries = 0; + int ret; + +retry: + ret = 0; if (object_creation_mode == OBJECT_CREATION_USES_RENAMES) goto try_rename; @@ -2059,6 +2066,8 @@ int finalize_object_file_flags(const char *tmpfile, const char *filename, * left to unlink. */ if (ret && ret != EEXIST) { + struct stat st; + try_rename: if (!stat(filename, &st)) ret = EEXIST; @@ -2074,9 +2083,17 @@ int finalize_object_file_flags(const char *tmpfile, const char *filename, errno = saved_errno; return error_errno(_("unable to write file %s"), filename); } - if (!(flags & FOF_SKIP_COLLISION_CHECK) && - check_collision(tmpfile, filename)) + if (!(flags & FOF_SKIP_COLLISION_CHECK)) { + ret = check_collision(tmpfile, filename); + if (ret == CHECK_COLLISION_DEST_VANISHED) { + if (retries++ > 5) + return error(_("unable to write repeatedly vanishing file %s"), + filename); + goto retry; + } + else if (ret) return -1; + } unlink_or_warn(tmpfile); } -- cgit v1.3 From 7b081d2f70feb7eadd1e93f52146e5d68371451d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:29 -0500 Subject: hash.h: introduce `unsafe_hash_algo()` In 253ed9ecff (hash.h: scaffolding for _unsafe hashing variants, 2024-09-26), we introduced "unsafe" variants of the SHA-1 hashing functions by introducing new functions like "unsafe_init_fn()" and so on. This approach has a major shortcoming that callers must remember to consistently use one variant or the other. Failing to consistently use (or not use) the unsafe variants can lead to crashes at best, or subtle memory corruption issues at worst. In the hashfile API, this isn't difficult to achieve, but verifying that all callers consistently use the unsafe variants is somewhat of a chore given how spread out all of the callers are. In the sha1 and sha1-unsafe test helpers, all of the calls to various hash functions are guarded by an "if (unsafe)" conditional, which is repetitive and cumbersome. Address these issues by introducing a new pattern whereby one 'git_hash_algo' can return a pointer to another 'git_hash_algo' that represents the unsafe version of itself. So instead of having something like: if (unsafe) the_hash_algo->init_fn(...); the_hash_algo->update_fn(...); the_hash_algo->final_fn(...); else the_hash_algo->unsafe_init_fn(...); the_hash_algo->unsafe_update_fn(...); the_hash_algo->unsafe_final_fn(...); we can instead write: struct git_hash_algo *algop = the_hash_algo; if (unsafe) algop = unsafe_hash_algo(algop); algop->init_fn(...); algop->update_fn(...); algop->final_fn(...); This removes the existing shortcoming by no longer forcing the caller to "remember" which variant of the hash functions it wants to call, only to hold onto a 'struct git_hash_algo' pointer that is initialized once. Similarly, while there currently is still a way to "mix" safe and unsafe functions, this too will go away after subsequent commits remove all direct calls to the unsafe_ variants. Note that hash_algo_by_ptr() needs an adjustment to allow passing in the unsafe variant of a hash function. All other query functions on the hash_algos array will continue to return the safe variants of any function. Suggested-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- hash.h | 13 ++++++++++++- object-file.c | 26 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'object-file.c') diff --git a/hash.h b/hash.h index 756166ce5e..0bf63cedfa 100644 --- a/hash.h +++ b/hash.h @@ -305,6 +305,9 @@ struct git_hash_algo { /* The all-zeros OID. */ const struct object_id *null_oid; + + /* The unsafe variant of this hash function, if one exists. */ + const struct git_hash_algo *unsafe; }; extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; @@ -320,9 +323,17 @@ int hash_algo_by_length(int len); /* Identical, except for a pointer to struct git_hash_algo. */ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) { - return p - hash_algos; + size_t i; + for (i = 0; i < GIT_HASH_NALGOS; i++) { + const struct git_hash_algo *algop = &hash_algos[i]; + if (p == algop) + return i; + } + return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop); + const struct object_id *null_oid(void); static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) diff --git a/object-file.c b/object-file.c index 5b792b3dd4..43efa4ca36 100644 --- a/object-file.c +++ b/object-file.c @@ -202,6 +202,22 @@ static void git_hash_unknown_final_oid(struct object_id *oid UNUSED, BUG("trying to finalize unknown hash"); } +static const struct git_hash_algo sha1_unsafe_algo = { + .name = "sha1", + .format_id = GIT_SHA1_FORMAT_ID, + .rawsz = GIT_SHA1_RAWSZ, + .hexsz = GIT_SHA1_HEXSZ, + .blksz = GIT_SHA1_BLKSZ, + .init_fn = git_hash_sha1_init_unsafe, + .clone_fn = git_hash_sha1_clone_unsafe, + .update_fn = git_hash_sha1_update_unsafe, + .final_fn = git_hash_sha1_final_unsafe, + .final_oid_fn = git_hash_sha1_final_oid_unsafe, + .empty_tree = &empty_tree_oid, + .empty_blob = &empty_blob_oid, + .null_oid = &null_oid_sha1, +}; + const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { { .name = NULL, @@ -239,6 +255,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .unsafe_update_fn = git_hash_sha1_update_unsafe, .unsafe_final_fn = git_hash_sha1_final_unsafe, .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, + .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, .null_oid = &null_oid_sha1, @@ -305,6 +322,15 @@ int hash_algo_by_length(int len) return GIT_HASH_UNKNOWN; } +const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop) +{ + /* If we have a faster "unsafe" implementation, use that. */ + if (algop->unsafe) + return algop->unsafe; + /* Otherwise use the default one. */ + return algop; +} + /* * This is meant to hold a *small* number of objects that you would * want repo_read_object_file() to be able to return, but yet you do not want -- cgit v1.3 From 04292c3796bb92664f6111326215d9c060ef71c8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:42 -0500 Subject: hash.h: drop unsafe_ function variants Now that all callers have been converted from: the_hash_algo->unsafe_init_fn(); to unsafe_hash_algo(the_hash_algo)->init_fn(); and similar, we can remove the scaffolding for the unsafe_ function variants and force callers to use the new unsafe_hash_algo() mechanic instead. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- hash.h | 15 --------------- object-file.c | 15 --------------- 2 files changed, 30 deletions(-) (limited to 'object-file.c') diff --git a/hash.h b/hash.h index 0bf63cedfa..ad2c919991 100644 --- a/hash.h +++ b/hash.h @@ -282,21 +282,6 @@ struct git_hash_algo { /* The hash finalization function for object IDs. */ git_hash_final_oid_fn final_oid_fn; - /* The non-cryptographic hash initialization function. */ - git_hash_init_fn unsafe_init_fn; - - /* The non-cryptographic hash context cloning function. */ - git_hash_clone_fn unsafe_clone_fn; - - /* The non-cryptographic hash update function. */ - git_hash_update_fn unsafe_update_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_fn unsafe_final_fn; - - /* The non-cryptographic hash finalization function. */ - git_hash_final_oid_fn unsafe_final_oid_fn; - /* The OID of the empty tree. */ const struct object_id *empty_tree; diff --git a/object-file.c b/object-file.c index 43efa4ca36..c4b42dd4be 100644 --- a/object-file.c +++ b/object-file.c @@ -230,11 +230,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_unknown_update, .final_fn = git_hash_unknown_final, .final_oid_fn = git_hash_unknown_final_oid, - .unsafe_init_fn = git_hash_unknown_init, - .unsafe_clone_fn = git_hash_unknown_clone, - .unsafe_update_fn = git_hash_unknown_update, - .unsafe_final_fn = git_hash_unknown_final, - .unsafe_final_oid_fn = git_hash_unknown_final_oid, .empty_tree = NULL, .empty_blob = NULL, .null_oid = NULL, @@ -250,11 +245,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha1_update, .final_fn = git_hash_sha1_final, .final_oid_fn = git_hash_sha1_final_oid, - .unsafe_init_fn = git_hash_sha1_init_unsafe, - .unsafe_clone_fn = git_hash_sha1_clone_unsafe, - .unsafe_update_fn = git_hash_sha1_update_unsafe, - .unsafe_final_fn = git_hash_sha1_final_unsafe, - .unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe, .unsafe = &sha1_unsafe_algo, .empty_tree = &empty_tree_oid, .empty_blob = &empty_blob_oid, @@ -271,11 +261,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { .update_fn = git_hash_sha256_update, .final_fn = git_hash_sha256_final, .final_oid_fn = git_hash_sha256_final_oid, - .unsafe_init_fn = git_hash_sha256_init, - .unsafe_clone_fn = git_hash_sha256_clone, - .unsafe_update_fn = git_hash_sha256_update, - .unsafe_final_fn = git_hash_sha256_final, - .unsafe_final_oid_fn = git_hash_sha256_final_oid, .empty_tree = &empty_tree_oid_sha256, .empty_blob = &empty_blob_oid_sha256, .null_oid = &null_oid_sha256, -- cgit v1.3