From 4a1c95931f5f0dc541925703b7d9ad9409cd4067 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 May 2024 16:14:32 -0700 Subject: builtin/patch-id: fix uninitialized hash function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), we have adapted `initialize_repository()` to no longer set up a default hash function. As this function is also used to set up `the_repository`, the consequence is that `the_hash_algo` will now by default be a `NULL` pointer unless the hash algorithm was configured properly. This is done as a mechanism to detect cases where we may be using the wrong hash function by accident. This change now causes git-patch-id(1) to segfault when it's run outside of a repository. As this command can read diffs from stdin, it does not necessarily need a repository, but then relies on `the_hash_algo` to compute the patch ID itself. It is somewhat dubious that git-patch-id(1) relies on `the_hash_algo` in the first place. Quoting its manpage: A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a patch, with line numbers ignored. As such, it’s "reasonably stable", but at the same time also reasonably unique, i.e., two patches that have the same "patch ID" are almost guaranteed to be the same thing. We explicitly document patch IDs to be using SHA-1. Furthermore, patch IDs are supposed to be stable for most of the part. But even with the same input, the patch IDs will now be different depending on the repo's configured object hash. Work around the issue by setting up SHA-1 when there was no startup repository for now. This is arguably not the correct fix, but for now we rather want to focus on getting the segfault fixed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/patch-id.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'builtin') diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3894d2b970..583099cacf 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -5,6 +5,7 @@ #include "hash.h" #include "hex.h" #include "parse-options.h" +#include "setup.h" static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result) { @@ -237,6 +238,18 @@ int cmd_patch_id(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, builtin_patch_id_options, patch_id_usage, 0); + /* + * We rely on `the_hash_algo` to compute patch IDs. This is dubious as + * it means that the hash algorithm now depends on the object hash of + * the repository, even though git-patch-id(1) clearly defines that + * patch IDs always use SHA1. + * + * NEEDSWORK: This hack should be removed in favor of converting + * the code that computes patch IDs to always use SHA1. + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + generate_id_list(opts ? opts > 1 : config.stable, opts ? opts == 3 : config.verbatim); return 0; -- cgit v1.3 From 8d058b8024ff57b2c6653a0667b01cfedb1bce44 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 20 May 2024 16:14:33 -0700 Subject: builtin/hash-object: fix uninitialized hash function The git-hash-object(1) command allows users to hash an object even without a repository. Starting with c8aed5e8da (repository: stop setting SHA1 as the default object hash, 2024-05-07), this will make us hit an uninitialized hash function, which subsequently leads to a segfault. Fix this by falling back to SHA-1 explicitly when running outside of a Git repository. Users can use GIT_DEFAULT_HASH environment to specify what hash algorithm they want, so arguably this code should not be needed. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/hash-object.c | 3 +++ t/t1007-hash-object.sh | 6 ++++++ t/t1517-outside-repo.sh | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 82ca6d2bfd..c767414a0c 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -123,6 +123,9 @@ int cmd_hash_object(int argc, const char **argv, const char *prefix) else prefix = setup_git_directory_gently(&nongit); + if (nongit && !the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + if (vpath && prefix) { vpath_free = prefix_filename(prefix, vpath); vpath = vpath_free; diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh index 64aea38486..d73a5cc237 100755 --- a/t/t1007-hash-object.sh +++ b/t/t1007-hash-object.sh @@ -260,4 +260,10 @@ test_expect_success '--literally with extra-long type' ' echo example | git hash-object -t $t --literally --stdin ' +test_expect_success '--stdin outside of repository (uses SHA-1)' ' + nongit git hash-object --stdin actual && + echo "$(test_oid --hash=sha1 hello)" >expect && + test_cmp expect actual +' + test_done diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index 278ef57b3a..2d8982d61a 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -29,7 +29,7 @@ test_expect_success 'compute a patch-id outside repository (uses SHA-1)' ' test_cmp patch-id.expect patch-id.actual ' -test_expect_failure 'hash-object outside repository (uses SHA-1)' ' +test_expect_success 'hash-object outside repository (uses SHA-1)' ' nongit env GIT_DEFAULT_HASH=sha1 \ git hash-object --stdin hash.expect && nongit \ -- cgit v1.3 From 4674ab682dc1a875fd29de8f4e9568196a88b97b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 20 May 2024 16:14:34 -0700 Subject: apply: fix uninitialized hash function "git apply" can work outside a repository as a better "GNU patch", but when it does so, it still assumed that it can access the_hash_algo, which is no longer true in the new world order. Make sure we explicitly fall back to SHA-1 algorithm for backward compatibility. It is of dubious value to make this configurable to other hash algorithms, as the code does not use the_hash_algo for hashing purposes when working outside a repository (which is how the_hash_algo is left to NULL)---it is only used to learn the max length of the hash when parsing the object names on the "index" line, but failing to parse the "index" line is not a hard failure, and the program does not support operations like applying binary patches and --3way fallback that requires object access outside a repository. Signed-off-by: Junio C Hamano --- builtin/apply.c | 10 ++++++++++ t/t1517-outside-repo.sh | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/apply.c b/builtin/apply.c index 861a01910c..d623c52f78 100644 --- a/builtin/apply.c +++ b/builtin/apply.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "gettext.h" #include "repository.h" +#include "hash.h" #include "apply.h" static const char * const apply_usage[] = { @@ -18,6 +19,15 @@ int cmd_apply(int argc, const char **argv, const char *prefix) if (init_apply_state(&state, the_repository, prefix)) exit(128); + /* + * We could to redo the "apply.c" machinery to make this + * arbitrary fallback unnecessary, but it is dubious that it + * is worth the effort. + * cf. https://lore.kernel.org/git/xmqqcypfcmn4.fsf@gitster.g/ + */ + if (!the_hash_algo) + repo_set_hash_algo(the_repository, GIT_HASH_SHA1); + argc = apply_parse_options(argc, argv, &state, &force_apply, &options, apply_usage); diff --git a/t/t1517-outside-repo.sh b/t/t1517-outside-repo.sh index 2d8982d61a..557808ffa7 100755 --- a/t/t1517-outside-repo.sh +++ b/t/t1517-outside-repo.sh @@ -37,7 +37,7 @@ test_expect_success 'hash-object outside repository (uses SHA-1)' ' test_cmp hash.expect hash.actual ' -test_expect_failure 'apply a patch outside repository' ' +test_expect_success 'apply a patch outside repository' ' ( cd non-repo && cp ../nums.old nums && -- cgit v1.3