From c67cf4c434039f9b5c796f7e34345b75e0c14450 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 28 Aug 2023 14:37:35 -0400 Subject: test-lib: ignore uninteresting LSan output When I run the tests in leak-checking mode the same way our CI job does, like: make SANITIZE=leak \ GIT_TEST_PASSING_SANITIZE_LEAK=true \ GIT_TEST_SANITIZE_LEAK_LOG=true \ test then LSan can racily produce useless entries in the log files that look like this: ==git==3034393==Unable to get registers from thread 3034307. I think they're mostly harmless based on the source here: https://github.com/llvm/llvm-project/blob/7e0a52e8e9ef6394bb62e0b56e17fa23e7262411/compiler-rt/lib/lsan/lsan_common.cpp#L414 which reads: PtraceRegistersStatus have_registers = suspended_threads.GetRegistersAndSP(i, ®isters, &sp); if (have_registers != REGISTERS_AVAILABLE) { Report("Unable to get registers from thread %llu.\n", os_id); // If unable to get SP, consider the entire stack to be reachable unless // GetRegistersAndSP failed with ESRCH. if (have_registers == REGISTERS_UNAVAILABLE_FATAL) continue; sp = stack_begin; } The program itself still runs fine and LSan doesn't cause us to abort. But test-lib.sh looks for any non-empty LSan logs and marks the test as a failure anyway, under the assumption that we simply missed the failing exit code somehow. I don't think I've ever seen this happen in the CI job, but running locally using clang-14 on an 8-core machine, I can't seem to make it through a full run of the test suite without having at least one failure. And it's a different one every time (though they do seem to often be related to packing tests, which makes sense, since that is one of our biggest users of threaded code). We can hack around this by only counting LSan log files that contain a line that doesn't match our known-uninteresting pattern. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano Signed-off-by: Johannes Schindelin --- t/test-lib.sh | 1 + 1 file changed, 1 insertion(+) (limited to 't') diff --git a/t/test-lib.sh b/t/test-lib.sh index 6db377f68b..251b22ba5a 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -334,6 +334,7 @@ nr_san_dir_leaks_ () { find "$TEST_RESULTS_SAN_DIR" \ -type f \ -name "$TEST_RESULTS_SAN_FILE_PFX.*" 2>/dev/null | + xargs grep -lv "Unable to get registers from thread" | wc -l } -- cgit v1.3 From 1204e1a824c34071019fe106348eaa6d88f9528d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 15 Apr 2024 13:30:41 +0200 Subject: builtin/clone: refuse local clones of unsafe repositories When performing a local clone of a repository we end up either copying or hardlinking the source repository into the target repository. This is significantly more performant than if we were to use git-upload-pack(1) and git-fetch-pack(1) to create the new repository and preserves both disk space and compute time. Unfortunately though, performing such a local clone of a repository that is not owned by the current user is inherently unsafe: - It is possible that source files get swapped out underneath us while we are copying or hardlinking them. While we do perform some checks here to assert that we hardlinked the expected file, they cannot reliably thwart time-of-check-time-of-use (TOCTOU) style races. It is thus possible for an adversary to make us copy or hardlink unexpected files into the target directory. Ideally, we would address this by starting to use openat(3P), fstatat(3P) and friends. Due to platform compatibility with Windows we cannot easily do that though. Furthermore, the scope of these fixes would likely be quite broad and thus not fit for an embargoed security release. - Even if we handled TOCTOU-style races perfectly, hardlinking files owned by a different user into the target repository is not a good idea in general. It is possible for an adversary to rewrite those files to contain whatever data they want even after the clone has completed. Address these issues by completely refusing local clones of a repository that is not owned by the current user. This reuses our existing infra we have in place via `ensure_valid_ownership()` and thus allows a user to override the safety guard by adding the source repository path to the "safe.directory" configuration. This addresses CVE-2024-32020. Signed-off-by: Patrick Steinhardt Signed-off-by: Johannes Schindelin --- builtin/clone.c | 14 ++++++++++++++ t/t0033-safe-directory.sh | 24 ++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 't') diff --git a/builtin/clone.c b/builtin/clone.c index 4b80fa0870..9ec500d427 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -321,6 +321,20 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest, struct dir_iterator *iter; int iter_status; + /* + * Refuse copying directories by default which aren't owned by us. The + * code that performs either the copying or hardlinking is not prepared + * to handle various edge cases where an adversary may for example + * racily swap out files for symlinks. This can cause us to + * inadvertently use the wrong source file. + * + * Furthermore, even if we were prepared to handle such races safely, + * creating hardlinks across user boundaries is an inherently unsafe + * operation as the hardlinked files can be rewritten at will by the + * potentially-untrusted user. We thus refuse to do so by default. + */ + die_upon_dubious_ownership(NULL, NULL, src_repo); + mkdir_if_missing(dest->buf, 0777); iter = dir_iterator_begin(src->buf, DIR_ITERATOR_PEDANTIC); diff --git a/t/t0033-safe-directory.sh b/t/t0033-safe-directory.sh index dc3496897a..11c3e8f28e 100755 --- a/t/t0033-safe-directory.sh +++ b/t/t0033-safe-directory.sh @@ -80,4 +80,28 @@ test_expect_success 'safe.directory in included file' ' git status ' +test_expect_success 'local clone of unowned repo refused in unsafe directory' ' + test_when_finished "rm -rf source" && + git init source && + ( + sane_unset GIT_TEST_ASSUME_DIFFERENT_OWNER && + test_commit -C source initial + ) && + test_must_fail git clone --local source target && + test_path_is_missing target +' + +test_expect_success 'local clone of unowned repo accepted in safe directory' ' + test_when_finished "rm -rf source" && + git init source && + ( + sane_unset GIT_TEST_ASSUME_DIFFERENT_OWNER && + test_commit -C source initial + ) && + test_must_fail git clone --local source target && + git config --global --add safe.directory "$(pwd)/source/.git" && + git clone --local source target && + test_path_is_dir target +' + test_done -- cgit v1.3 From 5c5a4a1c05932378d259b1fdd9526cab971656a2 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 04:29:33 +0100 Subject: t0411: add tests for cloning from partial repo Cloning from a partial repository must not fetch missing objects into the partial repository, because that can lead to arbitrary code execution. Add a couple of test cases, pretending to the `upload-pack` command (and to that command only) that it is working on a repository owned by someone else. Helped-by: Jeff King Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- t/t0411-clone-from-partial.sh | 60 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 t/t0411-clone-from-partial.sh (limited to 't') diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh new file mode 100755 index 0000000000..fb72a0a9ff --- /dev/null +++ b/t/t0411-clone-from-partial.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +test_description='check that local clone does not fetch from promisor remotes' + +. ./test-lib.sh + +test_expect_success 'create evil repo' ' + git init tmp && + test_commit -C tmp a && + git -C tmp config uploadpack.allowfilter 1 && + git clone --filter=blob:none --no-local --no-checkout tmp evil && + rm -rf tmp && + + git -C evil config remote.origin.uploadpack \"\$TRASH_DIRECTORY/fake-upload-pack\" && + write_script fake-upload-pack <<-\EOF && + echo >&2 "fake-upload-pack running" + >"$TRASH_DIRECTORY/script-executed" + exit 1 + EOF + export TRASH_DIRECTORY && + + # empty shallow file disables local clone optimization + >evil/.git/shallow +' + +test_expect_failure 'local clone must not fetch from promisor remote and execute script' ' + rm -f script-executed && + test_must_fail git clone \ + --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ + evil clone1 2>err && + ! grep "fake-upload-pack running" err && + test_path_is_missing script-executed +' + +test_expect_failure 'clone from file://... must not fetch from promisor remote and execute script' ' + rm -f script-executed && + test_must_fail git clone \ + --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ + "file://$(pwd)/evil" clone2 2>err && + ! grep "fake-upload-pack running" err && + test_path_is_missing script-executed +' + +test_expect_failure 'fetch from file://... must not fetch from promisor remote and execute script' ' + rm -f script-executed && + test_must_fail git fetch \ + --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ + "file://$(pwd)/evil" 2>err && + ! grep "fake-upload-pack running" err && + test_path_is_missing script-executed +' + +test_expect_success 'pack-objects should fetch from promisor remote and execute script' ' + rm -f script-executed && + echo "HEAD" | test_must_fail git -C evil pack-objects --revs --stdout >/dev/null 2>err && + grep "fake-upload-pack running" err && + test_path_is_file script-executed +' + +test_done -- cgit v1.3 From f4aa8c8bb11dae6e769cd930565173808cbb69c8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 10 Apr 2024 14:39:37 +0200 Subject: fetch/clone: detect dubious ownership of local repositories When cloning from somebody else's repositories, it is possible that, say, the `upload-pack` command is overridden in the repository that is about to be cloned, which would then be run in the user's context who started the clone. To remind the user that this is a potentially unsafe operation, let's extend the ownership checks we have already established for regular gitdir discovery to extend also to local repositories that are about to be cloned. This protection extends also to file:// URLs. The fixes in this commit address CVE-2024-32004. Note: This commit does not touch the `fetch`/`clone` code directly, but instead the function used implicitly by both: `enter_repo()`. This function is also used by `git receive-pack` (i.e. pushes), by `git upload-archive`, by `git daemon` and by `git http-backend`. In setups that want to serve repositories owned by different users than the account running the service, this will require `safe.*` settings to be configured accordingly. Also note: there are tiny time windows where a time-of-check-time-of-use ("TOCTOU") race is possible. The real solution to those would be to work with `fstat()` and `openat()`. However, the latter function is not available on Windows (and would have to be emulated with rather expensive low-level `NtCreateFile()` calls), and the changes would be quite extensive, for my taste too extensive for the little gain given that embargoed releases need to pay extra attention to avoid introducing inadvertent bugs. Signed-off-by: Johannes Schindelin --- cache.h | 12 ++++++++++++ path.c | 2 ++ setup.c | 21 +++++++++++++++++++++ t/t0411-clone-from-partial.sh | 6 +++--- 4 files changed, 38 insertions(+), 3 deletions(-) (limited to 't') diff --git a/cache.h b/cache.h index fcf49706ad..a46a3e4b6b 100644 --- a/cache.h +++ b/cache.h @@ -606,6 +606,18 @@ void set_git_work_tree(const char *tree); #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES" +/* + * Check if a repository is safe and die if it is not, by verifying the + * ownership of the worktree (if any), the git directory, and the gitfile (if + * any). + * + * Exemptions for known-safe repositories can be added via `safe.directory` + * config settings; for non-bare repositories, their worktree needs to be + * added, for bare ones their git directory. + */ +void die_upon_dubious_ownership(const char *gitfile, const char *worktree, + const char *gitdir); + void setup_work_tree(void); /* * Find the commondir and gitdir of the repository that contains the current diff --git a/path.c b/path.c index 492e17ad12..d61f70e87d 100644 --- a/path.c +++ b/path.c @@ -840,6 +840,7 @@ const char *enter_repo(const char *path, int strict) if (!suffix[i]) return NULL; gitfile = read_gitfile(used_path.buf); + die_upon_dubious_ownership(gitfile, NULL, used_path.buf); if (gitfile) { strbuf_reset(&used_path); strbuf_addstr(&used_path, gitfile); @@ -850,6 +851,7 @@ const char *enter_repo(const char *path, int strict) } else { const char *gitfile = read_gitfile(path); + die_upon_dubious_ownership(gitfile, NULL, path); if (gitfile) path = gitfile; if (chdir(path)) diff --git a/setup.c b/setup.c index cefd5f63c4..9d401ae4c8 100644 --- a/setup.c +++ b/setup.c @@ -1165,6 +1165,27 @@ static int ensure_valid_ownership(const char *gitfile, return data.is_safe; } +void die_upon_dubious_ownership(const char *gitfile, const char *worktree, + const char *gitdir) +{ + struct strbuf report = STRBUF_INIT, quoted = STRBUF_INIT; + const char *path; + + if (ensure_valid_ownership(gitfile, worktree, gitdir, &report)) + return; + + strbuf_complete(&report, '\n'); + path = gitfile ? gitfile : gitdir; + sq_quote_buf_pretty("ed, path); + + die(_("detected dubious ownership in repository at '%s'\n" + "%s" + "To add an exception for this directory, call:\n" + "\n" + "\tgit config --global --add safe.directory %s"), + path, report.buf, quoted.buf); +} + static int allowed_bare_repo_cb(const char *key, const char *value, void *d) { enum allowed_bare_repo *allowed_bare_repo = d; diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh index fb72a0a9ff..eb3360dbca 100755 --- a/t/t0411-clone-from-partial.sh +++ b/t/t0411-clone-from-partial.sh @@ -23,7 +23,7 @@ test_expect_success 'create evil repo' ' >evil/.git/shallow ' -test_expect_failure 'local clone must not fetch from promisor remote and execute script' ' +test_expect_success 'local clone must not fetch from promisor remote and execute script' ' rm -f script-executed && test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ @@ -32,7 +32,7 @@ test_expect_failure 'local clone must not fetch from promisor remote and execute test_path_is_missing script-executed ' -test_expect_failure 'clone from file://... must not fetch from promisor remote and execute script' ' +test_expect_success 'clone from file://... must not fetch from promisor remote and execute script' ' rm -f script-executed && test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ @@ -41,7 +41,7 @@ test_expect_failure 'clone from file://... must not fetch from promisor remote a test_path_is_missing script-executed ' -test_expect_failure 'fetch from file://... must not fetch from promisor remote and execute script' ' +test_expect_success 'fetch from file://... must not fetch from promisor remote and execute script' ' rm -f script-executed && test_must_fail git fetch \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ -- cgit v1.3 From 7b70e9efb18c2cc3f219af399bd384c5801ba1d7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 16 Apr 2024 04:35:33 -0400 Subject: upload-pack: disable lazy-fetching by default The upload-pack command tries to avoid trusting the repository in which it's run (e.g., by not running any hooks and not using any config that contains arbitrary commands). But if the server side of a fetch or a clone is a partial clone, then either upload-pack or its child pack-objects may run a lazy "git fetch" under the hood. And it is very easy to convince fetch to run arbitrary commands. The "server" side can be a local repository owned by someone else, who would be able to configure commands that are run during a clone with the current user's permissions. This issue has been designated CVE-2024-32004. The fix in this commit's parent helps in this scenario, as well as in related scenarios using SSH to clone, where the untrusted .git directory is owned by a different user id. But if you received one as a zip file, on a USB stick, etc, it may be owned by your user but still untrusted. This has been designated CVE-2024-32465. To mitigate the issue more completely, let's disable lazy fetching entirely during `upload-pack`. While fetching from a partial repository should be relatively rare, it is certainly not an unreasonable workflow. And thus we need to provide an escape hatch. This commit works by respecting a GIT_NO_LAZY_FETCH environment variable (to skip the lazy-fetch), and setting it in upload-pack, but only when the user has not already done so (which gives us the escape hatch). The name of the variable is specifically chosen to match what has already been added in 'master' via e6d5479e7a (git: extend --no-lazy-fetch to work across subprocesses, 2024-02-27). Since we're building this fix as a backport for older versions, we could cherry-pick that patch and its earlier steps. However, we don't really need the niceties (like a "--no-lazy-fetch" option) that it offers. By using the same name, everything should just work when the two are eventually merged, but here are a few notes: - the blocking of the fetch in e6d5479e7a is incomplete! It sets fetch_if_missing to 0 when we setup the repository variable, but that isn't enough. pack-objects in particular will call prefetch_to_pack() even if that variable is 0. This patch by contrast checks the environment variable at the lowest level before we call the lazy fetch, where we can be sure to catch all code paths. Possibly the setting of fetch_if_missing from e6d5479e7a can be reverted, but it may be useful to have. For example, some code may want to use that flag to change behavior before it gets to the point of trying to start the fetch. At any rate, that's all outside the scope of this patch. - there's documentation for GIT_NO_LAZY_FETCH in e6d5479e7a. We can live without that here, because for the most part the user shouldn't need to set it themselves. The exception is if they do want to override upload-pack's default, and that requires a separate documentation section (which is added here) - it would be nice to use the NO_LAZY_FETCH_ENVIRONMENT macro added by e6d5479e7a, but those definitions have moved from cache.h to environment.h between 2.39.3 and master. I just used the raw string literals, and we can replace them with the macro once this topic is merged to master. At least with respect to CVE-2024-32004, this does render this commit's parent commit somewhat redundant. However, it is worth retaining that commit as defense in depth, and because it may help other issues (e.g., symlink/hardlink TOCTOU races, where zip files are not really an interesting attack vector). The tests in t0411 still pass, but now we have _two_ mechanisms ensuring that the evil command is not run. Let's beef up the existing ones to check that they failed for the expected reason, that we refused to run upload-pack at all with an alternate user id. And add two new ones for the same-user case that both the restriction and its escape hatch. Signed-off-by: Jeff King Signed-off-by: Johannes Schindelin --- Documentation/git-upload-pack.txt | 16 ++++++++++++++++ builtin/upload-pack.c | 2 ++ promisor-remote.c | 10 ++++++++++ t/t0411-clone-from-partial.sh | 18 ++++++++++++++++++ 4 files changed, 46 insertions(+) (limited to 't') diff --git a/Documentation/git-upload-pack.txt b/Documentation/git-upload-pack.txt index b656b47567..fc4c62d7bc 100644 --- a/Documentation/git-upload-pack.txt +++ b/Documentation/git-upload-pack.txt @@ -55,6 +55,22 @@ ENVIRONMENT admins may need to configure some transports to allow this variable to be passed. See the discussion in linkgit:git[1]. +`GIT_NO_LAZY_FETCH`:: + When cloning or fetching from a partial repository (i.e., one + itself cloned with `--filter`), the server-side `upload-pack` + may need to fetch extra objects from its upstream in order to + complete the request. By default, `upload-pack` will refuse to + perform such a lazy fetch, because `git fetch` may run arbitrary + commands specified in configuration and hooks of the source + repository (and `upload-pack` tries to be safe to run even in + untrusted `.git` directories). ++ +This is implemented by having `upload-pack` internally set the +`GIT_NO_LAZY_FETCH` variable to `1`. If you want to override it +(because you are fetching from a partial clone, and you are sure +you trust it), you can explicitly set `GIT_NO_LAZY_FETCH` to +`0`. + SEE ALSO -------- linkgit:gitnamespaces[7] diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index 25b69da2bf..f446ff04f6 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -35,6 +35,8 @@ int cmd_upload_pack(int argc, const char **argv, const char *prefix) packet_trace_identity("upload-pack"); read_replace_refs = 0; + /* TODO: This should use NO_LAZY_FETCH_ENVIRONMENT */ + xsetenv("GIT_NO_LAZY_FETCH", "1", 0); argc = parse_options(argc, argv, prefix, options, upload_pack_usage, 0); diff --git a/promisor-remote.c b/promisor-remote.c index faa7612941..550a38f752 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -20,6 +20,16 @@ static int fetch_objects(struct repository *repo, int i; FILE *child_in; + /* TODO: This should use NO_LAZY_FETCH_ENVIRONMENT */ + if (git_env_bool("GIT_NO_LAZY_FETCH", 0)) { + static int warning_shown; + if (!warning_shown) { + warning_shown = 1; + warning(_("lazy fetching disabled; some objects may not be available")); + } + return -1; + } + child.git_cmd = 1; child.in = -1; if (repo != the_repository) diff --git a/t/t0411-clone-from-partial.sh b/t/t0411-clone-from-partial.sh index eb3360dbca..b3d6ddc4bc 100755 --- a/t/t0411-clone-from-partial.sh +++ b/t/t0411-clone-from-partial.sh @@ -28,6 +28,7 @@ test_expect_success 'local clone must not fetch from promisor remote and execute test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ evil clone1 2>err && + grep "detected dubious ownership" err && ! grep "fake-upload-pack running" err && test_path_is_missing script-executed ' @@ -37,6 +38,7 @@ test_expect_success 'clone from file://... must not fetch from promisor remote a test_must_fail git clone \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ "file://$(pwd)/evil" clone2 2>err && + grep "detected dubious ownership" err && ! grep "fake-upload-pack running" err && test_path_is_missing script-executed ' @@ -46,6 +48,7 @@ test_expect_success 'fetch from file://... must not fetch from promisor remote a test_must_fail git fetch \ --upload-pack="GIT_TEST_ASSUME_DIFFERENT_OWNER=true git-upload-pack" \ "file://$(pwd)/evil" 2>err && + grep "detected dubious ownership" err && ! grep "fake-upload-pack running" err && test_path_is_missing script-executed ' @@ -57,4 +60,19 @@ test_expect_success 'pack-objects should fetch from promisor remote and execute test_path_is_file script-executed ' +test_expect_success 'clone from promisor remote does not lazy-fetch by default' ' + rm -f script-executed && + test_must_fail git clone evil no-lazy 2>err && + grep "lazy fetching disabled" err && + test_path_is_missing script-executed +' + +test_expect_success 'promisor lazy-fetching can be re-enabled' ' + rm -f script-executed && + test_must_fail env GIT_NO_LAZY_FETCH=0 \ + git clone evil lazy-ok 2>err && + grep "fake-upload-pack running" err && + test_path_is_file script-executed +' + test_done -- cgit v1.3 From c30a574a0b50e64f26885f740dd49d2420b9bed7 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 04:30:25 +0100 Subject: has_dir_name(): do not get confused by characters < '/' There is a bug in directory/file ("D/F") conflict checking optimization: It assumes that such a conflict cannot happen if a newly added entry's path is lexicgraphically "greater than" the last already-existing index entry _and_ contains a directory separator that comes strictly after the common prefix (`len > len_eq_offset`). This assumption is incorrect, though: `a-` sorts _between_ `a` and `a/b`, their common prefix is `a`, the slash comes after the common prefix, and there is still a file/directory conflict. Let's re-design this logic, taking these facts into consideration: - It is impossible for a file to sort after another file with whose directory it conflicts because the trailing NUL byte is always smaller than any other character. - Since there are quite a number of ASCII characters that sort before the slash (e.g. `-`, `.`, the space character), looking at the last already-existing index entry is not enough to determine whether there is a D/F conflict when the first character different from the existing last index entry's path is a slash. If it is not a slash, there cannot be a file/directory conflict. And if the existing index entry's first different character is a slash, it also cannot be a file/directory conflict because the optimization requires the newly-added entry's path to sort _after_ the existing entry's, and the conflicting file's path would not. So let's fall back to the regular binary search whenever the newly-added item's path differs in a slash character. If it does not, and it sorts after the last index entry, there is no D/F conflict and the new index entry can be safely appended. This fix also nicely simplifies the logic and makes it much easier to reason about, while the impact on performance should be negligible: After this fix, the optimization will be skipped only when index entry's paths differ in a slash and a space, `!`, `"`, `#`, `$`, `%`, `&`, `'`, | ( `)`, `*`, `+`, `,`, `-`, or `.`, which should be a rare situation. Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- read-cache.c | 72 +++++++++++++++----------------------------------------- t/t0000-basic.sh | 28 ++++++++++++++++++++++ 2 files changed, 47 insertions(+), 53 deletions(-) (limited to 't') diff --git a/read-cache.c b/read-cache.c index 46f5e497b1..383ec6d366 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1186,19 +1186,32 @@ static int has_dir_name(struct index_state *istate, istate->cache[istate->cache_nr - 1]->name, &len_eq_last); if (cmp_last > 0) { - if (len_eq_last == 0) { + if (name[len_eq_last] != '/') { /* * The entry sorts AFTER the last one in the - * index and their paths have no common prefix, - * so there cannot be a F/D conflict. + * index. + * + * If there were a conflict with "file", then our + * name would start with "file/" and the last index + * entry would start with "file" but not "file/". + * + * The next character after common prefix is + * not '/', so there can be no conflict. */ return retval; } else { /* * The entry sorts AFTER the last one in the - * index, but has a common prefix. Fall through - * to the loop below to disect the entry's path - * and see where the difference is. + * index, and the next character after common + * prefix is '/'. + * + * Either the last index entry is a file in + * conflict with this entry, or it has a name + * which sorts between this entry and the + * potential conflicting file. + * + * In both cases, we fall through to the loop + * below and let the regular search code handle it. */ } } else if (cmp_last == 0) { @@ -1222,53 +1235,6 @@ static int has_dir_name(struct index_state *istate, } len = slash - name; - if (cmp_last > 0) { - /* - * (len + 1) is a directory boundary (including - * the trailing slash). And since the loop is - * decrementing "slash", the first iteration is - * the longest directory prefix; subsequent - * iterations consider parent directories. - */ - - if (len + 1 <= len_eq_last) { - /* - * The directory prefix (including the trailing - * slash) also appears as a prefix in the last - * entry, so the remainder cannot collide (because - * strcmp said the whole path was greater). - * - * EQ: last: xxx/A - * this: xxx/B - * - * LT: last: xxx/file_A - * this: xxx/file_B - */ - return retval; - } - - if (len > len_eq_last) { - /* - * This part of the directory prefix (excluding - * the trailing slash) is longer than the known - * equal portions, so this sub-directory cannot - * collide with a file. - * - * GT: last: xxxA - * this: xxxB/file - */ - return retval; - } - - /* - * This is a possible collision. Fall through and - * let the regular search code handle it. - * - * last: xxx - * this: xxx/file - */ - } - pos = index_name_stage_pos(istate, name, len, stage, EXPAND_SPARSE); if (pos >= 0) { /* diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 502b4bcf9e..2ba219b18b 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1200,6 +1200,34 @@ test_expect_success 'very long name in the index handled sanely' ' test $len = 4098 ' +# D/F conflict checking uses an optimization when adding to the end. +# make sure it does not get confused by `a-` sorting _between_ +# `a` and `a/`. +test_expect_success 'more update-index D/F conflicts' ' + # empty the index to make sure our entry is last + git read-tree --empty && + cacheinfo=100644,$(test_oid empty_blob) && + git update-index --add --cacheinfo $cacheinfo,path5/a && + + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/c/file && + + # "a-" sorts between "a" and "a/" + git update-index --add --cacheinfo $cacheinfo,path5/a- && + + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/file && + test_must_fail git update-index --add --cacheinfo $cacheinfo,path5/a/b/c/file && + + cat >expected <<-\EOF && + path5/a + path5/a- + EOF + git ls-files >actual && + test_cmp expected actual +' + test_expect_success 'test_must_fail on a failing git command' ' test_must_fail git notacommand ' -- cgit v1.3 From b20c10fd9b035f46e48112d2cd33d7cb740012b6 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 04:32:47 +0100 Subject: t7423: add tests for symlinked submodule directories Submodule operations must not follow symlinks in working tree, because otherwise files might be written to unintended places, leading to vulnerabilities. Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- t/t7423-submodule-symlinks.sh | 66 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100755 t/t7423-submodule-symlinks.sh (limited to 't') diff --git a/t/t7423-submodule-symlinks.sh b/t/t7423-submodule-symlinks.sh new file mode 100755 index 0000000000..a72f3cbcab --- /dev/null +++ b/t/t7423-submodule-symlinks.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +test_description='check that submodule operations do not follow symlinks' + +. ./test-lib.sh + +test_expect_success 'prepare' ' + git config --global protocol.file.allow always && + test_commit initial && + git init upstream && + test_commit -C upstream upstream submodule_file && + git submodule add ./upstream a/sm && + test_tick && + git commit -m submodule +' + +test_expect_failure SYMLINKS 'git submodule update must not create submodule behind symlink' ' + rm -rf a b && + mkdir b && + ln -s b a && + test_must_fail git submodule update && + test_path_is_missing b/sm +' + +test_expect_failure SYMLINKS,CASE_INSENSITIVE_FS 'git submodule update must not create submodule behind symlink on case insensitive fs' ' + rm -rf a b && + mkdir b && + ln -s b A && + test_must_fail git submodule update && + test_path_is_missing b/sm +' + +prepare_symlink_to_repo() { + rm -rf a && + mkdir a && + git init a/target && + git -C a/target fetch ../../upstream && + ln -s target a/sm +} + +test_expect_success SYMLINKS 'git restore --recurse-submodules must not be confused by a symlink' ' + prepare_symlink_to_repo && + test_must_fail git restore --recurse-submodules a/sm && + test_path_is_missing a/sm/submodule_file && + test_path_is_dir a/target/.git && + test_path_is_missing a/target/submodule_file +' + +test_expect_failure SYMLINKS 'git restore --recurse-submodules must not migrate git dir of symlinked repo' ' + prepare_symlink_to_repo && + rm -rf .git/modules && + test_must_fail git restore --recurse-submodules a/sm && + test_path_is_dir a/target/.git && + test_path_is_missing .git/modules/a/sm && + test_path_is_missing a/target/submodule_file +' + +test_expect_failure SYMLINKS 'git checkout -f --recurse-submodules must not migrate git dir of symlinked repo when removing submodule' ' + prepare_symlink_to_repo && + rm -rf .git/modules && + test_must_fail git checkout -f --recurse-submodules initial && + test_path_is_dir a/target/.git && + test_path_is_missing .git/modules/a/sm +' + +test_done -- cgit v1.3 From 9cf85473209ea8ae2b56c13145c4704d12ee1374 Mon Sep 17 00:00:00 2001 From: Filip Hejsek Date: Sun, 28 Jan 2024 05:09:17 +0100 Subject: clone: prevent clashing git dirs when cloning submodule in parallel While it is expected to have several git dirs within the `.git/modules/` tree, it is important that they do not interfere with each other. For example, if one submodule was called "captain" and another submodule "captain/hooks", their respective git dirs would clash, as they would be located in `.git/modules/captain/` and `.git/modules/captain/hooks/`, respectively, i.e. the latter's files could clash with the actual Git hooks of the former. To prevent these clashes, and in particular to prevent hooks from being written and then executed as part of a recursive clone, we introduced checks as part of the fix for CVE-2019-1387 in a8dee3ca61 (Disallow dubiously-nested submodule git directories, 2019-10-01). It is currently possible to bypass the check for clashing submodule git dirs in two ways: 1. parallel cloning 2. checkout --recurse-submodules Let's check not only before, but also after parallel cloning (and before checking out the submodule), that the git dir is not clashing with another one, otherwise fail. This addresses the parallel cloning issue. As to the parallel checkout issue: It requires quite a few manual steps to create clashing git dirs because Git itself would refuse to initialize the inner one, as demonstrated by the test case. Nevertheless, let's teach the recursive checkout (namely, the `submodule_move_head()` function that is used by the recursive checkout) to be careful to verify that it does not use a clashing git dir, and if it does, disable it (by deleting the `HEAD` file so that subsequent Git calls won't recognize it as a git dir anymore). Note: The parallel cloning test case contains a `cat err` that proved to be highly useful when analyzing the racy nature of the operation (the operation can fail with three different error messages, depending on timing), and was left on purpose to ease future debugging should the need arise. Signed-off-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 17 +++++++++++++++++ submodule.c | 17 +++++++++++++++++ t/t7450-bad-git-dotfiles.sh | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 66 insertions(+), 2 deletions(-) (limited to 't') diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 6743fb27bd..b76e13ddce 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1717,6 +1717,23 @@ static int clone_submodule(const struct module_clone_data *clone_data, free(path); } + /* + * We already performed this check at the beginning of this function, + * before cloning the objects. This tries to detect racy behavior e.g. + * in parallel clones, where another process could easily have made the + * gitdir nested _after_ it was created. + * + * To prevent further harm coming from this unintentionally-nested + * gitdir, let's disable it by deleting the `HEAD` file. + */ + if (validate_submodule_git_dir(sm_gitdir, clone_data->name) < 0) { + char *head = xstrfmt("%s/HEAD", sm_gitdir); + unlink(head); + free(head); + die(_("refusing to create/use '%s' in another submodule's " + "git dir"), sm_gitdir); + } + connect_work_tree_and_git_dir(clone_data_path, sm_gitdir, 0); p = git_pathdup_submodule(clone_data_path, "config"); diff --git a/submodule.c b/submodule.c index fae24ef34a..71ec23ad98 100644 --- a/submodule.c +++ b/submodule.c @@ -2146,10 +2146,27 @@ int submodule_move_head(const char *path, if (old_head) { if (!submodule_uses_gitfile(path)) absorb_git_dir_into_superproject(path); + else { + char *dotgit = xstrfmt("%s/.git", path); + char *git_dir = xstrdup(read_gitfile(dotgit)); + + free(dotgit); + if (validate_submodule_git_dir(git_dir, + sub->name) < 0) + die(_("refusing to create/use '%s' in " + "another submodule's git dir"), + git_dir); + free(git_dir); + } } else { struct strbuf gitdir = STRBUF_INIT; submodule_name_to_gitdir(&gitdir, the_repository, sub->name); + if (validate_submodule_git_dir(gitdir.buf, + sub->name) < 0) + die(_("refusing to create/use '%s' in another " + "submodule's git dir"), + gitdir.buf); connect_work_tree_and_git_dir(path, gitdir.buf, 0); strbuf_release(&gitdir); diff --git a/t/t7450-bad-git-dotfiles.sh b/t/t7450-bad-git-dotfiles.sh index ba1f569bcb..8f94129e74 100755 --- a/t/t7450-bad-git-dotfiles.sh +++ b/t/t7450-bad-git-dotfiles.sh @@ -292,7 +292,7 @@ test_expect_success WINDOWS 'prevent git~1 squatting on Windows' ' fi ' -test_expect_success 'git dirs of sibling submodules must not be nested' ' +test_expect_success 'setup submodules with nested git dirs' ' git init nested && test_commit -C nested nested && ( @@ -310,9 +310,39 @@ test_expect_success 'git dirs of sibling submodules must not be nested' ' git add .gitmodules thing1 thing2 && test_tick && git commit -m nested - ) && + ) +' + +test_expect_success 'git dirs of sibling submodules must not be nested' ' test_must_fail git clone --recurse-submodules nested clone 2>err && test_i18ngrep "is inside git dir" err ' +test_expect_success 'submodule git dir nesting detection must work with parallel cloning' ' + test_must_fail git clone --recurse-submodules --jobs=2 nested clone_parallel 2>err && + cat err && + grep -E "(already exists|is inside git dir|not a git repository)" err && + { + test_path_is_missing .git/modules/hippo/HEAD || + test_path_is_missing .git/modules/hippo/hooks/HEAD + } +' + +test_expect_success 'checkout -f --recurse-submodules must not use a nested gitdir' ' + git clone nested nested_checkout && + ( + cd nested_checkout && + git submodule init && + git submodule update thing1 && + mkdir -p .git/modules/hippo/hooks/refs && + mkdir -p .git/modules/hippo/hooks/objects/info && + echo "../../../../objects" >.git/modules/hippo/hooks/objects/info/alternates && + echo "ref: refs/heads/master" >.git/modules/hippo/hooks/HEAD + ) && + test_must_fail git -C nested_checkout checkout -f --recurse-submodules HEAD 2>err && + cat err && + grep "is inside git dir" err && + test_path_is_missing nested_checkout/thing2/.git +' + test_done -- cgit v1.3 From 97065761333fd62db1912d81b489db938d8c991d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 22 Mar 2024 11:19:22 +0100 Subject: submodules: submodule paths must not contain symlinks When creating a submodule path, we must be careful not to follow symbolic links. Otherwise we may follow a symbolic link pointing to a gitdir (which are valid symbolic links!) e.g. while cloning. On case-insensitive filesystems, however, we blindly replace a directory that has been created as part of the `clone` operation with a symlink when the path to the latter differs only in case from the former's path. Let's simply avoid this situation by expecting not ever having to overwrite any existing file/directory/symlink upon cloning. That way, we won't even replace a directory that we just created. This addresses CVE-2024-32002. Reported-by: Filip Hejsek Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 35 +++++++++++++++++++++++++++++++++ t/t7406-submodule-update.sh | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 't') diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index b76e13ddce..4c1a7dbcda 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1641,12 +1641,35 @@ static char *clone_submodule_sm_gitdir(const char *name) return sm_gitdir; } +static int dir_contains_only_dotgit(const char *path) +{ + DIR *dir = opendir(path); + struct dirent *e; + int ret = 1; + + if (!dir) + return 0; + + e = readdir_skip_dot_and_dotdot(dir); + if (!e) + ret = 0; + else if (strcmp(DEFAULT_GIT_DIR_ENVIRONMENT, e->d_name) || + (e = readdir_skip_dot_and_dotdot(dir))) { + error("unexpected item '%s' in '%s'", e->d_name, path); + ret = 0; + } + + closedir(dir); + return ret; +} + static int clone_submodule(const struct module_clone_data *clone_data, struct string_list *reference) { char *p; char *sm_gitdir = clone_submodule_sm_gitdir(clone_data->name); char *sm_alternate = NULL, *error_strategy = NULL; + struct stat st; struct child_process cp = CHILD_PROCESS_INIT; const char *clone_data_path = clone_data->path; char *to_free = NULL; @@ -1660,6 +1683,10 @@ static int clone_submodule(const struct module_clone_data *clone_data, "git dir"), sm_gitdir); if (!file_exists(sm_gitdir)) { + if (clone_data->require_init && !stat(clone_data_path, &st) && + !is_empty_dir(clone_data_path)) + die(_("directory not empty: '%s'"), clone_data_path); + if (safe_create_leading_directories_const(sm_gitdir) < 0) die(_("could not create directory '%s'"), sm_gitdir); @@ -1704,6 +1731,14 @@ static int clone_submodule(const struct module_clone_data *clone_data, if(run_command(&cp)) die(_("clone of '%s' into submodule path '%s' failed"), clone_data->url, clone_data_path); + + if (clone_data->require_init && !stat(clone_data_path, &st) && + !dir_contains_only_dotgit(clone_data_path)) { + char *dot_git = xstrfmt("%s/.git", clone_data_path); + unlink(dot_git); + free(dot_git); + die(_("directory not empty: '%s'"), clone_data_path); + } } else { char *path; diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index f094e3d7f3..63c24f7f7c 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -1179,4 +1179,52 @@ test_expect_success 'submodule update --recursive skip submodules with strategy= test_cmp expect.err actual.err ' +test_expect_success CASE_INSENSITIVE_FS,SYMLINKS \ + 'submodule paths must not follow symlinks' ' + + # This is only needed because we want to run this in a self-contained + # test without having to spin up an HTTP server; However, it would not + # be needed in a real-world scenario where the submodule is simply + # hosted on a public site. + test_config_global protocol.file.allow always && + + # Make sure that Git tries to use symlinks on Windows + test_config_global core.symlinks true && + + tell_tale_path="$PWD/tell.tale" && + git init hook && + ( + cd hook && + mkdir -p y/hooks && + write_script y/hooks/post-checkout <<-EOF && + echo HOOK-RUN >&2 + echo hook-run >"$tell_tale_path" + EOF + git add y/hooks/post-checkout && + test_tick && + git commit -m post-checkout + ) && + + hook_repo_path="$(pwd)/hook" && + git init captain && + ( + cd captain && + git submodule add --name x/y "$hook_repo_path" A/modules/x && + test_tick && + git commit -m add-submodule && + + printf .git >dotgit.txt && + git hash-object -w --stdin dot-git.hash && + printf "120000 %s 0\ta\n" "$(cat dot-git.hash)" >index.info && + git update-index --index-info err && + grep "directory not empty" err && + test_path_is_missing "$tell_tale_path" +' + test_done -- cgit v1.3 From e8d0608944486019ea0e1ed2ed29776811a565c2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 26 Mar 2024 14:37:25 +0100 Subject: submodule: require the submodule path to contain directories only Submodules are stored in subdirectories of their superproject. When these subdirectories have been replaced with symlinks by a malicious actor, all kinds of mayhem can be caused. This _should_ not be possible, but many CVEs in the past showed that _when_ possible, it allows attackers to slip in code that gets executed during, say, a `git clone --recursive` operation. Let's add some defense-in-depth to disallow submodule paths to have anything except directories in them. Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 32 ++++++++++++++++++- submodule.c | 72 +++++++++++++++++++++++++++++++++++++++++++ submodule.h | 5 +++ t/t7423-submodule-symlinks.sh | 9 +++--- 4 files changed, 113 insertions(+), 5 deletions(-) (limited to 't') diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 9eacc43574..941afe1568 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -294,6 +294,9 @@ static void runcommand_in_submodule_cb(const struct cache_entry *list_item, struct child_process cp = CHILD_PROCESS_INIT; char *displaypath; + if (validate_submodule_path(path) < 0) + exit(128); + displaypath = get_submodule_displaypath(path, info->prefix); sub = submodule_from_path(the_repository, null_oid(), path); @@ -620,6 +623,9 @@ static void status_submodule(const char *path, const struct object_id *ce_oid, .free_removed_argv_elements = 1, }; + if (validate_submodule_path(path) < 0) + exit(128); + if (!submodule_from_path(the_repository, null_oid(), path)) die(_("no submodule mapping found in .gitmodules for path '%s'"), path); @@ -1220,6 +1226,9 @@ static void sync_submodule(const char *path, const char *prefix, if (!is_submodule_active(the_repository, path)) return; + if (validate_submodule_path(path) < 0) + exit(128); + sub = submodule_from_path(the_repository, null_oid(), path); if (sub && sub->url) { @@ -1360,6 +1369,9 @@ static void deinit_submodule(const char *path, const char *prefix, struct strbuf sb_config = STRBUF_INIT; char *sub_git_dir = xstrfmt("%s/.git", path); + if (validate_submodule_path(path) < 0) + exit(128); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub || !sub->name) @@ -1674,6 +1686,9 @@ static int clone_submodule(const struct module_clone_data *clone_data, const char *clone_data_path = clone_data->path; char *to_free = NULL; + if (validate_submodule_path(clone_data_path) < 0) + exit(128); + if (!is_absolute_path(clone_data->path)) clone_data_path = to_free = xstrfmt("%s/%s", get_git_work_tree(), clone_data->path); @@ -2542,6 +2557,9 @@ static int update_submodule(struct update_data *update_data) { int ret; + if (validate_submodule_path(update_data->sm_path) < 0) + return -1; + ret = determine_submodule_update_strategy(the_repository, update_data->just_cloned, update_data->sm_path, @@ -2649,12 +2667,21 @@ static int update_submodules(struct update_data *update_data) for (i = 0; i < suc.update_clone_nr; i++) { struct update_clone_data ucd = suc.update_clone[i]; - int code; + int code = 128; oidcpy(&update_data->oid, &ucd.oid); update_data->just_cloned = ucd.just_cloned; update_data->sm_path = ucd.sub->path; + /* + * Verify that the submodule path does not contain any + * symlinks; if it does, it might have been tampered with. + * TODO: allow exempting it via + * `safe.submodule.path` or something + */ + if (validate_submodule_path(update_data->sm_path) < 0) + goto fail; + code = ensure_core_worktree(update_data->sm_path); if (code) goto fail; @@ -3361,6 +3388,9 @@ static int module_add(int argc, const char **argv, const char *prefix) normalize_path_copy(add_data.sm_path, add_data.sm_path); strip_dir_trailing_slashes(add_data.sm_path); + if (validate_submodule_path(add_data.sm_path) < 0) + exit(128); + die_on_index_match(add_data.sm_path, force); die_on_repo_without_commits(add_data.sm_path); diff --git a/submodule.c b/submodule.c index 71ec23ad98..0b87ae6340 100644 --- a/submodule.c +++ b/submodule.c @@ -1005,6 +1005,9 @@ static int submodule_has_commits(struct repository *r, .super_oid = super_oid }; + if (validate_submodule_path(path) < 0) + exit(128); + oid_array_for_each_unique(commits, check_has_commit, &has_commit); if (has_commit.result) { @@ -1127,6 +1130,9 @@ static int push_submodule(const char *path, const struct string_list *push_options, int dry_run) { + if (validate_submodule_path(path) < 0) + exit(128); + if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) { struct child_process cp = CHILD_PROCESS_INIT; strvec_push(&cp.args, "push"); @@ -1176,6 +1182,9 @@ static void submodule_push_check(const char *path, const char *head, struct child_process cp = CHILD_PROCESS_INIT; int i; + if (validate_submodule_path(path) < 0) + exit(128); + strvec_push(&cp.args, "submodule--helper"); strvec_push(&cp.args, "push-check"); strvec_push(&cp.args, head); @@ -1507,6 +1516,9 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf struct fetch_task *task = xmalloc(sizeof(*task)); memset(task, 0, sizeof(*task)); + if (validate_submodule_path(path) < 0) + exit(128); + task->sub = submodule_from_path(spf->r, treeish_name, path); if (!task->sub) { @@ -1879,6 +1891,9 @@ unsigned is_submodule_modified(const char *path, int ignore_untracked) const char *git_dir; int ignore_cp_exit_code = 0; + if (validate_submodule_path(path) < 0) + exit(128); + strbuf_addf(&buf, "%s/.git", path); git_dir = read_gitfile(buf.buf); if (!git_dir) @@ -1955,6 +1970,9 @@ int submodule_uses_gitfile(const char *path) struct strbuf buf = STRBUF_INIT; const char *git_dir; + if (validate_submodule_path(path) < 0) + exit(128); + strbuf_addf(&buf, "%s/.git", path); git_dir = read_gitfile(buf.buf); if (!git_dir) { @@ -1994,6 +2012,9 @@ int bad_to_remove_submodule(const char *path, unsigned flags) struct strbuf buf = STRBUF_INIT; int ret = 0; + if (validate_submodule_path(path) < 0) + exit(128); + if (!file_exists(path) || is_empty_dir(path)) return 0; @@ -2044,6 +2065,9 @@ void submodule_unset_core_worktree(const struct submodule *sub) { struct strbuf config_path = STRBUF_INIT; + if (validate_submodule_path(sub->path) < 0) + exit(128); + submodule_name_to_gitdir(&config_path, the_repository, sub->name); strbuf_addstr(&config_path, "/config"); @@ -2066,6 +2090,9 @@ static int submodule_has_dirty_index(const struct submodule *sub) { struct child_process cp = CHILD_PROCESS_INIT; + if (validate_submodule_path(sub->path) < 0) + exit(128); + prepare_submodule_repo_env(&cp.env); cp.git_cmd = 1; @@ -2083,6 +2110,10 @@ static int submodule_has_dirty_index(const struct submodule *sub) static void submodule_reset_index(const char *path) { struct child_process cp = CHILD_PROCESS_INIT; + + if (validate_submodule_path(path) < 0) + exit(128); + prepare_submodule_repo_env(&cp.env); cp.git_cmd = 1; @@ -2287,6 +2318,34 @@ int validate_submodule_git_dir(char *git_dir, const char *submodule_name) return 0; } +int validate_submodule_path(const char *path) +{ + char *p = xstrdup(path); + struct stat st; + int i, ret = 0; + char sep; + + for (i = 0; !ret && p[i]; i++) { + if (!is_dir_sep(p[i])) + continue; + + sep = p[i]; + p[i] = '\0'; + /* allow missing components, but no symlinks */ + ret = lstat(p, &st) || !S_ISLNK(st.st_mode) ? 0 : -1; + p[i] = sep; + if (ret) + error(_("expected '%.*s' in submodule path '%s' not to " + "be a symbolic link"), i, p, p); + } + if (!lstat(p, &st) && S_ISLNK(st.st_mode)) + ret = error(_("expected submodule path '%s' not to be a " + "symbolic link"), p); + free(p); + return ret; +} + + /* * Embeds a single submodules git directory into the superprojects git dir, * non recursively. @@ -2297,6 +2356,9 @@ static void relocate_single_git_dir_into_superproject(const char *path) struct strbuf new_gitdir = STRBUF_INIT; const struct submodule *sub; + if (validate_submodule_path(path) < 0) + exit(128); + if (submodule_uses_worktrees(path)) die(_("relocate_gitdir for submodule '%s' with " "more than one worktree not supported"), path); @@ -2337,6 +2399,9 @@ static void absorb_git_dir_into_superproject_recurse(const char *path) struct child_process cp = CHILD_PROCESS_INIT; + if (validate_submodule_path(path) < 0) + exit(128); + cp.dir = path; cp.git_cmd = 1; cp.no_stdin = 1; @@ -2359,6 +2424,10 @@ void absorb_git_dir_into_superproject(const char *path) int err_code; const char *sub_git_dir; struct strbuf gitdir = STRBUF_INIT; + + if (validate_submodule_path(path) < 0) + exit(128); + strbuf_addf(&gitdir, "%s/.git", path); sub_git_dir = resolve_gitdir_gently(gitdir.buf, &err_code); @@ -2501,6 +2570,9 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule) const char *git_dir; int ret = 0; + if (validate_submodule_path(submodule) < 0) + exit(128); + strbuf_reset(buf); strbuf_addstr(buf, submodule); strbuf_complete(buf, '/'); diff --git a/submodule.h b/submodule.h index b52a4ff1e7..fb770f1687 100644 --- a/submodule.h +++ b/submodule.h @@ -148,6 +148,11 @@ void submodule_name_to_gitdir(struct strbuf *buf, struct repository *r, */ int validate_submodule_git_dir(char *git_dir, const char *submodule_name); +/* + * Make sure that the given submodule path does not follow symlinks. + */ +int validate_submodule_path(const char *path); + #define SUBMODULE_MOVE_HEAD_DRY_RUN (1<<0) #define SUBMODULE_MOVE_HEAD_FORCE (1<<1) int submodule_move_head(const char *path, diff --git a/t/t7423-submodule-symlinks.sh b/t/t7423-submodule-symlinks.sh index a72f3cbcab..3d3c7af3ce 100755 --- a/t/t7423-submodule-symlinks.sh +++ b/t/t7423-submodule-symlinks.sh @@ -14,15 +14,16 @@ test_expect_success 'prepare' ' git commit -m submodule ' -test_expect_failure SYMLINKS 'git submodule update must not create submodule behind symlink' ' +test_expect_success SYMLINKS 'git submodule update must not create submodule behind symlink' ' rm -rf a b && mkdir b && ln -s b a && + test_path_is_missing b/sm && test_must_fail git submodule update && test_path_is_missing b/sm ' -test_expect_failure SYMLINKS,CASE_INSENSITIVE_FS 'git submodule update must not create submodule behind symlink on case insensitive fs' ' +test_expect_success SYMLINKS,CASE_INSENSITIVE_FS 'git submodule update must not create submodule behind symlink on case insensitive fs' ' rm -rf a b && mkdir b && ln -s b A && @@ -46,7 +47,7 @@ test_expect_success SYMLINKS 'git restore --recurse-submodules must not be confu test_path_is_missing a/target/submodule_file ' -test_expect_failure SYMLINKS 'git restore --recurse-submodules must not migrate git dir of symlinked repo' ' +test_expect_success SYMLINKS 'git restore --recurse-submodules must not migrate git dir of symlinked repo' ' prepare_symlink_to_repo && rm -rf .git/modules && test_must_fail git restore --recurse-submodules a/sm && @@ -55,7 +56,7 @@ test_expect_failure SYMLINKS 'git restore --recurse-submodules must not migrate test_path_is_missing a/target/submodule_file ' -test_expect_failure SYMLINKS 'git checkout -f --recurse-submodules must not migrate git dir of symlinked repo when removing submodule' ' +test_expect_success SYMLINKS 'git checkout -f --recurse-submodules must not migrate git dir of symlinked repo when removing submodule' ' prepare_symlink_to_repo && rm -rf .git/modules && test_must_fail git checkout -f --recurse-submodules initial && -- cgit v1.3 From e4930e86c0d521aa6c3c3da9f590e852f6eeac21 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 24 Mar 2024 14:13:41 +0100 Subject: t5510: verify that D/F confusion cannot lead to an RCE The most critical vulnerabilities in Git lead to a Remote Code Execution ("RCE"), i.e. the ability for an attacker to have malicious code being run as part of a Git operation that is not expected to run said code, such has hooks delivered as part of a `git clone`. A couple of parent commits ago, a bug was fixed that let Git be confused by the presence of a path `a-` to mistakenly assume that a directory `a/` can safely be created without removing an existing `a` that is a symbolic link. This bug did not represent an exploitable vulnerability on its own; Let's make sure it stays that way. Signed-off-by: Johannes Schindelin --- t/t5510-fetch.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 't') diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index c0b745e33b..211afe13e9 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -1240,6 +1240,30 @@ EOF test_cmp fatal-expect fatal-actual ' +test_expect_success SYMLINKS 'clone does not get confused by a D/F conflict' ' + git init df-conflict && + ( + cd df-conflict && + ln -s .git a && + git add a && + test_tick && + git commit -m symlink && + test_commit a- && + rm a && + mkdir -p a/hooks && + write_script a/hooks/post-checkout <<-EOF && + echo WHOOPSIE >&2 + echo whoopsie >"$TRASH_DIRECTORY"/whoops + EOF + git add a/hooks/post-checkout && + test_tick && + git commit -m post-checkout + ) && + git clone df-conflict clone 2>err && + ! grep WHOOPS err && + test_path_is_missing whoops +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd -- cgit v1.3 From 31572dc420afee36db8fbbbe060dd78c9a48778c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 28 Mar 2024 10:55:07 +0100 Subject: clone: when symbolic links collide with directories, keep the latter When recursively cloning a repository with submodules, we must ensure that the submodules paths do not suddenly contain symbolic links that would let Git write into unintended locations. We just plugged that vulnerability, but let's add some more defense-in-depth. Since we can only keep one item on disk if multiple index entries' paths collide, we may just as well avoid keeping a symbolic link (because that would allow attack vectors where Git follows those links by mistake). Technically, we handle more situations than cloning submodules into paths that were (partially) replaced by symbolic links. This provides defense-in-depth in case someone finds a case-folding confusion vulnerability in the future that does not even involve submodules. Signed-off-by: Johannes Schindelin --- entry.c | 14 ++++++++++++++ t/t5601-clone.sh | 15 +++++++++++++++ t/t7406-submodule-update.sh | 4 ++-- 3 files changed, 31 insertions(+), 2 deletions(-) (limited to 't') diff --git a/entry.c b/entry.c index a4c18c5645..1d78e54168 100644 --- a/entry.c +++ b/entry.c @@ -541,6 +541,20 @@ int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca, /* If it is a gitlink, leave it alone! */ if (S_ISGITLINK(ce->ce_mode)) return 0; + /* + * We must avoid replacing submodules' leading + * directories with symbolic links, lest recursive + * clones can write into arbitrary locations. + * + * Technically, this logic is not limited + * to recursive clones, or for that matter to + * submodules' paths colliding with symbolic links' + * paths. Yet it strikes a balance in favor of + * simplicity, and if paths are colliding, we might + * just as well keep the directories during a clone. + */ + if (state->clone && S_ISLNK(ce->ce_mode)) + return 0; remove_subtree(&path); } else if (unlink(path.buf)) return error_errno("unable to unlink old '%s'", path.buf); diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index b2524a24c2..fd02984330 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -633,6 +633,21 @@ test_expect_success CASE_INSENSITIVE_FS 'colliding file detection' ' test_i18ngrep "the following paths have collided" icasefs/warning ' +test_expect_success CASE_INSENSITIVE_FS,SYMLINKS \ + 'colliding symlink/directory keeps directory' ' + git init icasefs-colliding-symlink && + ( + cd icasefs-colliding-symlink && + a=$(printf a | git hash-object -w --stdin) && + printf "100644 %s 0\tA/dir/b\n120000 %s 0\ta\n" $a $a >idx && + git update-index --index-info err && - grep "directory not empty" err && + git clone --recursive captain hooked 2>err && + ! grep HOOK-RUN err && test_path_is_missing "$tell_tale_path" ' -- cgit v1.3 From 584de0b4c235209fa60ca4a733678472263bdce0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 30 Mar 2024 15:59:20 +0100 Subject: Add a helper function to compare file contents In the next commit, Git will learn to disallow hooks during `git clone` operations _except_ when those hooks come from the templates (which are inherently supposed to be trusted). To that end, we add a function to compare the contents of two files. Signed-off-by: Johannes Schindelin --- cache.h | 14 +++++++++++ copy.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++ t/helper/test-path-utils.c | 10 ++++++++ t/t0060-path-utils.sh | 41 ++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) (limited to 't') diff --git a/cache.h b/cache.h index 8c5fb1e1ba..16b34799bf 100644 --- a/cache.h +++ b/cache.h @@ -1785,6 +1785,20 @@ int copy_fd(int ifd, int ofd); int copy_file(const char *dst, const char *src, int mode); int copy_file_with_time(const char *dst, const char *src, int mode); +/* + * Compare the file mode and contents of two given files. + * + * If both files are actually symbolic links, the function returns 1 if the link + * targets are identical or 0 if they are not. + * + * If any of the two files cannot be accessed or in case of read failures, this + * function returns 0. + * + * If the file modes and contents are identical, the function returns 1, + * otherwise it returns 0. + */ +int do_files_match(const char *path1, const char *path2); + void write_or_die(int fd, const void *buf, size_t count); void fsync_or_die(int fd, const char *); int fsync_component(enum fsync_component component, int fd); diff --git a/copy.c b/copy.c index 4de6a110f0..8492f6fc83 100644 --- a/copy.c +++ b/copy.c @@ -65,3 +65,61 @@ int copy_file_with_time(const char *dst, const char *src, int mode) return copy_times(dst, src); return status; } + +static int do_symlinks_match(const char *path1, const char *path2) +{ + struct strbuf buf1 = STRBUF_INIT, buf2 = STRBUF_INIT; + int ret = 0; + + if (!strbuf_readlink(&buf1, path1, 0) && + !strbuf_readlink(&buf2, path2, 0)) + ret = !strcmp(buf1.buf, buf2.buf); + + strbuf_release(&buf1); + strbuf_release(&buf2); + return ret; +} + +int do_files_match(const char *path1, const char *path2) +{ + struct stat st1, st2; + int fd1 = -1, fd2 = -1, ret = 1; + char buf1[8192], buf2[8192]; + + if ((fd1 = open_nofollow(path1, O_RDONLY)) < 0 || + fstat(fd1, &st1) || !S_ISREG(st1.st_mode)) { + if (fd1 < 0 && errno == ELOOP) + /* maybe this is a symbolic link? */ + return do_symlinks_match(path1, path2); + ret = 0; + } else if ((fd2 = open_nofollow(path2, O_RDONLY)) < 0 || + fstat(fd2, &st2) || !S_ISREG(st2.st_mode)) { + ret = 0; + } + + if (ret) + /* to match, neither must be executable, or both */ + ret = !(st1.st_mode & 0111) == !(st2.st_mode & 0111); + + if (ret) + ret = st1.st_size == st2.st_size; + + while (ret) { + ssize_t len1 = read_in_full(fd1, buf1, sizeof(buf1)); + ssize_t len2 = read_in_full(fd2, buf2, sizeof(buf2)); + + if (len1 < 0 || len2 < 0 || len1 != len2) + ret = 0; /* read error or different file size */ + else if (!len1) /* len2 is also 0; hit EOF on both */ + break; /* ret is still true */ + else + ret = !memcmp(buf1, buf2, len1); + } + + if (fd1 >= 0) + close(fd1); + if (fd2 >= 0) + close(fd2); + + return ret; +} diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index f69709d674..0e0de21807 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -495,6 +495,16 @@ int cmd__path_utils(int argc, const char **argv) return !!res; } + if (argc == 4 && !strcmp(argv[1], "do_files_match")) { + int ret = do_files_match(argv[2], argv[3]); + + if (ret) + printf("equal\n"); + else + printf("different\n"); + return !ret; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 68e29c904a..73d0e1a7f1 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -560,4 +560,45 @@ test_expect_success !VALGRIND,RUNTIME_PREFIX,CAN_EXEC_IN_PWD '%(prefix)/ works' test_cmp expect actual ' +test_expect_success 'do_files_match()' ' + test_seq 0 10 >0-10.txt && + test_seq -1 10 >-1-10.txt && + test_seq 1 10 >1-10.txt && + test_seq 1 9 >1-9.txt && + test_seq 0 8 >0-8.txt && + + test-tool path-utils do_files_match 0-10.txt 0-10.txt >out && + + assert_fails() { + test_must_fail \ + test-tool path-utils do_files_match "$1" "$2" >out && + grep different out + } && + + assert_fails 0-8.txt 1-9.txt && + assert_fails -1-10.txt 0-10.txt && + assert_fails 1-10.txt 1-9.txt && + assert_fails 1-10.txt .git && + assert_fails does-not-exist 1-10.txt && + + if test_have_prereq FILEMODE + then + cp 0-10.txt 0-10.x && + chmod a+x 0-10.x && + assert_fails 0-10.txt 0-10.x + fi && + + if test_have_prereq SYMLINKS + then + ln -sf 0-10.txt symlink && + ln -s 0-10.txt another-symlink && + ln -s over-the-ocean yet-another-symlink && + ln -s "$PWD/0-10.txt" absolute-symlink && + assert_fails 0-10.txt symlink && + test-tool path-utils do_files_match symlink another-symlink && + assert_fails symlink yet-another-symlink && + assert_fails symlink absolute-symlink + fi +' + test_done -- cgit v1.3 From 8db1e8743c0f1ed241f6a1b8bf55b6fef07d6751 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 28 Mar 2024 19:21:06 +0100 Subject: clone: prevent hooks from running during a clone Critical security issues typically combine relatively common vulnerabilities such as case confusion in file paths with other weaknesses in order to raise the severity of the attack. One such weakness that has haunted the Git project in many a submodule-related CVE is that any hooks that are found are executed during a clone operation. Examples are the `post-checkout` and `fsmonitor` hooks. However, Git's design calls for hooks to be disabled by default, as only disabled example hooks are copied over from the templates in `/share/git-core/templates/`. As a defense-in-depth measure, let's prevent those hooks from running. Obviously, administrators can choose to drop enabled hooks into the template directory, though, _and_ it is also possible to override `core.hooksPath`, in which case the new check needs to be disabled. Signed-off-by: Johannes Schindelin --- builtin/clone.c | 12 +++++++++++- hook.c | 32 ++++++++++++++++++++++++++++++++ t/t5601-clone.sh | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) (limited to 't') diff --git a/builtin/clone.c b/builtin/clone.c index 3c2ae31a55..35a73ed0a7 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -908,6 +908,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) int err = 0, complete_refs_before_fetch = 1; int submodule_progress; int filter_submodules = 0; + const char *template_dir; + char *template_dir_dup = NULL; struct transport_ls_refs_options transport_ls_refs_options = TRANSPORT_LS_REFS_OPTIONS_INIT; @@ -927,6 +929,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) usage_msg_opt(_("You must specify a repository to clone."), builtin_clone_usage, builtin_clone_options); + xsetenv("GIT_CLONE_PROTECTION_ACTIVE", "true", 0 /* allow user override */); + template_dir = get_template_dir(option_template); + if (*template_dir && !is_absolute_path(template_dir)) + template_dir = template_dir_dup = + absolute_pathdup(template_dir); + xsetenv("GIT_CLONE_TEMPLATE_DIR", template_dir, 1); + if (option_depth || option_since || option_not.nr) deepen = 1; if (option_single_branch == -1) @@ -1074,7 +1083,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } } - init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, NULL, + init_db(git_dir, real_git_dir, template_dir, GIT_HASH_UNKNOWN, NULL, INIT_DB_QUIET); if (real_git_dir) { @@ -1392,6 +1401,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) free(unborn_head); free(dir); free(path); + free(template_dir_dup); UNLEAK(repo); junk_mode = JUNK_LEAVE_ALL; diff --git a/hook.c b/hook.c index 22b274b60b..632b537b99 100644 --- a/hook.c +++ b/hook.c @@ -3,6 +3,30 @@ #include "run-command.h" #include "config.h" +static int identical_to_template_hook(const char *name, const char *path) +{ + const char *env = getenv("GIT_CLONE_TEMPLATE_DIR"); + const char *template_dir = get_template_dir(env && *env ? env : NULL); + struct strbuf template_path = STRBUF_INIT; + int found_template_hook, ret; + + strbuf_addf(&template_path, "%s/hooks/%s", template_dir, name); + found_template_hook = access(template_path.buf, X_OK) >= 0; +#ifdef STRIP_EXTENSION + if (!found_template_hook) { + strbuf_addstr(&template_path, STRIP_EXTENSION); + found_template_hook = access(template_path.buf, X_OK) >= 0; + } +#endif + if (!found_template_hook) + return 0; + + ret = do_files_match(template_path.buf, path); + + strbuf_release(&template_path); + return ret; +} + const char *find_hook(const char *name) { static struct strbuf path = STRBUF_INIT; @@ -38,6 +62,14 @@ const char *find_hook(const char *name) } return NULL; } + if (!git_hooks_path && git_env_bool("GIT_CLONE_PROTECTION_ACTIVE", 0) && + !identical_to_template_hook(name, path.buf)) + die(_("active `%s` hook found during `git clone`:\n\t%s\n" + "For security reasons, this is disallowed by default.\n" + "If this is intentional and the hook should actually " + "be run, please\nrun the command again with " + "`GIT_CLONE_PROTECTION_ACTIVE=false`"), + name, path.buf); return path.buf; } diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index fd02984330..20deca0231 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -771,6 +771,57 @@ test_expect_success 'batch missing blob request does not inadvertently try to fe git clone --filter=blob:limit=0 "file://$(pwd)/server" client ' +test_expect_success 'clone with init.templatedir runs hooks' ' + git init tmpl/hooks && + write_script tmpl/hooks/post-checkout <<-EOF && + echo HOOK-RUN >&2 + echo I was here >hook.run + EOF + git -C tmpl/hooks add . && + test_tick && + git -C tmpl/hooks commit -m post-checkout && + + test_when_finished "git config --global --unset init.templateDir || :" && + test_when_finished "git config --unset init.templateDir || :" && + ( + sane_unset GIT_TEMPLATE_DIR && + NO_SET_GIT_TEMPLATE_DIR=t && + export NO_SET_GIT_TEMPLATE_DIR && + + git -c core.hooksPath="$(pwd)/tmpl/hooks" \ + clone tmpl/hooks hook-run-hookspath 2>err && + ! grep "active .* hook found" err && + test_path_is_file hook-run-hookspath/hook.run && + + git -c init.templateDir="$(pwd)/tmpl" \ + clone tmpl/hooks hook-run-config 2>err && + ! grep "active .* hook found" err && + test_path_is_file hook-run-config/hook.run && + + git clone --template=tmpl tmpl/hooks hook-run-option 2>err && + ! grep "active .* hook found" err && + test_path_is_file hook-run-option/hook.run && + + git config --global init.templateDir "$(pwd)/tmpl" && + git clone tmpl/hooks hook-run-global-config 2>err && + git config --global --unset init.templateDir && + ! grep "active .* hook found" err && + test_path_is_file hook-run-global-config/hook.run && + + # clone ignores local `init.templateDir`; need to create + # a new repository because we deleted `.git/` in the + # `setup` test case above + git init local-clone && + cd local-clone && + + git config init.templateDir "$(pwd)/../tmpl" && + git clone ../tmpl/hooks hook-run-local-config 2>err && + git config --unset init.templateDir && + ! grep "active .* hook found" err && + test_path_is_missing hook-run-local-config/hook.run + ) +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd -- cgit v1.3 From 4412a04fe6f7e632269a6668a4f367230ca2c0e0 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 29 Mar 2024 13:15:32 +0100 Subject: init.templateDir: consider this config setting protected The ability to configuring the template directory is a delicate feature: It allows defining hooks that will be run e.g. during a `git clone` operation, such as the `post-checkout` hook. As such, it is of utmost importance that Git would not allow that config setting to be changed during a `git clone` by mistake, allowing an attacker a chance for a Remote Code Execution, allowing attackers to run arbitrary code on unsuspecting users' machines. As a defense-in-depth measure, to prevent minor vulnerabilities in the `git clone` code from ballooning into higher-serverity attack vectors, let's make this a protected setting just like `safe.directory` and friends, i.e. ignore any `init.templateDir` entries from any local config. Note: This does not change the behavior of any recursive clone (modulo bugs), as the local repository config is not even supposed to be written while cloning the superproject, except in one scenario: If a config template is configured that sets the template directory. This might be done because `git clone --recurse-submodules --template=` does not pass that template directory on to the submodules' initialization. Another scenario where this commit changes behavior is where repositories are _not_ cloned recursively, and then some (intentional, benign) automation configures the template directory to be used before initializing the submodules. So the caveat is that this could theoretically break existing processes. In both scenarios, there is a way out, though: configuring the template directory via the environment variable `GIT_TEMPLATE_DIR`. This change in behavior is a trade-off between security and backwards-compatibility that is struck in favor of security. Signed-off-by: Johannes Schindelin --- setup.c | 37 ++++++++++++++++++++++++++++++------- t/t7400-submodule-basic.sh | 31 +++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 7 deletions(-) (limited to 't') diff --git a/setup.c b/setup.c index e6e749ec4b..c3301f5ab8 100644 --- a/setup.c +++ b/setup.c @@ -1726,6 +1726,31 @@ int daemonize(void) #define DEFAULT_GIT_TEMPLATE_DIR "/usr/share/git-core/templates" #endif +struct template_dir_cb_data { + char *path; + int initialized; +}; + +static int template_dir_cb(const char *key, const char *value, void *d) +{ + struct template_dir_cb_data *data = d; + + if (strcmp(key, "init.templatedir")) + return 0; + + if (!value) { + data->path = NULL; + } else { + char *path = NULL; + + FREE_AND_NULL(data->path); + if (!git_config_pathname((const char **)&path, key, value)) + data->path = path ? path : xstrdup(value); + } + + return 0; +} + const char *get_template_dir(const char *option_template) { const char *template_dir = option_template; @@ -1733,15 +1758,13 @@ const char *get_template_dir(const char *option_template) if (!template_dir) template_dir = getenv(TEMPLATE_DIR_ENVIRONMENT); if (!template_dir) { - static const char *init_template_dir; - static int initialized; + static struct template_dir_cb_data data; - if (!initialized) { - git_config_get_pathname("init.templatedir", - &init_template_dir); - initialized = 1; + if (!data.initialized) { + git_protected_config(template_dir_cb, &data); + data.initialized = 1; } - template_dir = init_template_dir; + template_dir = data.path; } if (!template_dir) { static char *dir; diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index eae6a46ef3..3e8cf9b885 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -1436,4 +1436,35 @@ test_expect_success 'recursive clone respects -q' ' test_must_be_empty actual ' +test_expect_success '`submodule init` and `init.templateDir`' ' + mkdir -p tmpl/hooks && + write_script tmpl/hooks/post-checkout <<-EOF && + echo HOOK-RUN >&2 + echo I was here >hook.run + exit 1 + EOF + + test_config init.templateDir "$(pwd)/tmpl" && + test_when_finished \ + "git config --global --unset init.templateDir || true" && + ( + sane_unset GIT_TEMPLATE_DIR && + NO_SET_GIT_TEMPLATE_DIR=t && + export NO_SET_GIT_TEMPLATE_DIR && + + git config --global init.templateDir "$(pwd)/tmpl" && + test_must_fail git submodule \ + add "$submodurl" sub-global 2>err && + git config --global --unset init.templateDir && + grep HOOK-RUN err && + test_path_is_file sub-global/hook.run && + + git config init.templateDir "$(pwd)/tmpl" && + git submodule add "$submodurl" sub-local 2>err && + git config --unset init.templateDir && + ! grep HOOK-RUN err && + test_path_is_missing sub-local/hook.run + ) +' + test_done -- cgit v1.3 From 20f3588efc6cbcae5bbaabf65ee12df87b51a9ea Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 30 Mar 2024 19:12:50 +0100 Subject: core.hooksPath: add some protection while cloning Quite frequently, when vulnerabilities were found in Git's (quite complex) clone machinery, a relatively common way to escalate the severity was to trick Git into running a hook which is actually a script that has just been laid on disk as part of that clone. This constitutes a Remote Code Execution vulnerability, the highest severity observed in Git's vulnerabilities so far. Some previously-fixed vulnerabilities allowed malicious repositories to be crafted such that Git would check out files not in the worktree, but in, say, a submodule's `/hooks/` directory. A vulnerability that "merely" allows to modify the Git config would allow a related attack vector, to manipulate Git into looking in the worktree for hooks, e.g. redirecting the location where Git looks for hooks, via setting `core.hooksPath` (which would be classified as CWE-427: Uncontrolled Search Path Element and CWE-114: Process Control, for more details see https://cwe.mitre.org/data/definitions/427.html and https://cwe.mitre.org/data/definitions/114.html). To prevent that attack vector, let's error out and complain loudly if an active `core.hooksPath` configuration is seen in the repository-local Git config during a `git clone`. There is one caveat: This changes Git's behavior in a slightly backwards-incompatible manner. While it is probably a rare scenario (if it exists at all) to configure `core.hooksPath` via a config in the Git templates, it _is_ conceivable that some valid setup requires this to work. In the hopefully very unlikely case that a user runs into this, there is an escape hatch: set the `GIT_CLONE_PROTECTION_ACTIVE=false` environment variable. Obviously, this should be done only with utmost caution. Signed-off-by: Johannes Schindelin --- config.c | 13 ++++++++++++- t/t1800-hook.sh | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 't') diff --git a/config.c b/config.c index 8c1c4071f0..85b37f2ee0 100644 --- a/config.c +++ b/config.c @@ -1525,8 +1525,19 @@ static int git_default_core_config(const char *var, const char *value, void *cb) if (!strcmp(var, "core.attributesfile")) return git_config_pathname(&git_attributes_file, var, value); - if (!strcmp(var, "core.hookspath")) + if (!strcmp(var, "core.hookspath")) { + if (current_config_scope() == CONFIG_SCOPE_LOCAL && + git_env_bool("GIT_CLONE_PROTECTION_ACTIVE", 0)) + die(_("active `core.hooksPath` found in the local " + "repository config:\n\t%s\nFor security " + "reasons, this is disallowed by default.\nIf " + "this is intentional and the hook should " + "actually be run, please\nrun the command " + "again with " + "`GIT_CLONE_PROTECTION_ACTIVE=false`"), + value); return git_config_pathname(&git_hooks_path, var, value); + } if (!strcmp(var, "core.bare")) { is_bare_repository_cfg = git_config_bool(var, value); diff --git a/t/t1800-hook.sh b/t/t1800-hook.sh index 2ef3579fa7..7ee12e6f48 100755 --- a/t/t1800-hook.sh +++ b/t/t1800-hook.sh @@ -177,4 +177,19 @@ test_expect_success 'git hook run a hook with a bad shebang' ' test_cmp expect actual ' +test_expect_success 'clone protections' ' + test_config core.hooksPath "$(pwd)/my-hooks" && + mkdir -p my-hooks && + write_script my-hooks/test-hook <<-\EOF && + echo Hook ran $1 + EOF + + git hook run test-hook 2>err && + grep "Hook ran" err && + test_must_fail env GIT_CLONE_PROTECTION_ACTIVE=true \ + git hook run test-hook 2>err && + grep "active .core.hooksPath" err && + ! grep "Hook ran" err +' + test_done -- cgit v1.3 From a33fea0886cfa016d313d2bd66bdd08615bffbc9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 10 Apr 2024 18:01:13 +0200 Subject: fsck: warn about symlink pointing inside a gitdir In the wake of fixing a vulnerability where `git clone` mistakenly followed a symbolic link that it had just written while checking out files, writing into a gitdir, let's add some defense-in-depth by teaching `git fsck` to report symbolic links stored in its trees that point inside `.git/`. Even though the Git project never made any promises about the exact shape of the `.git/` directory's contents, there are likely repositories out there containing symbolic links that point inside the gitdir. For that reason, let's only report these as warnings, not as errors. Security-conscious users are encouraged to configure `fsck.symlinkPointsToGitDir = error`. Signed-off-by: Johannes Schindelin --- Documentation/fsck-msgids.txt | 12 ++++++++++ fsck.c | 56 +++++++++++++++++++++++++++++++++++++++++++ fsck.h | 12 ++++++++++ t/t1450-fsck.sh | 37 ++++++++++++++++++++++++++++ 4 files changed, 117 insertions(+) (limited to 't') diff --git a/Documentation/fsck-msgids.txt b/Documentation/fsck-msgids.txt index 12eae8a222..b06ec385af 100644 --- a/Documentation/fsck-msgids.txt +++ b/Documentation/fsck-msgids.txt @@ -157,6 +157,18 @@ `nullSha1`:: (WARN) Tree contains entries pointing to a null sha1. +`symlinkPointsToGitDir`:: + (WARN) Symbolic link points inside a gitdir. + +`symlinkTargetBlob`:: + (ERROR) A non-blob found instead of a symbolic link's target. + +`symlinkTargetLength`:: + (WARN) Symbolic link target longer than maximum path length. + +`symlinkTargetMissing`:: + (ERROR) Unable to read symbolic link target's blob. + `treeNotSorted`:: (ERROR) A tree is not properly sorted. diff --git a/fsck.c b/fsck.c index 47eaeedd70..b85868e122 100644 --- a/fsck.c +++ b/fsck.c @@ -636,6 +636,8 @@ static int fsck_tree(const struct object_id *tree_oid, retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_MAILMAP_SYMLINK, ".mailmap is a symlink"); + oidset_insert(&options->symlink_targets_found, + entry_oid); } if ((backslash = strchr(name, '\\'))) { @@ -1228,6 +1230,56 @@ static int fsck_blob(const struct object_id *oid, const char *buf, } } + if (oidset_contains(&options->symlink_targets_found, oid)) { + const char *ptr = buf; + const struct object_id *reported = NULL; + + oidset_insert(&options->symlink_targets_done, oid); + + if (!buf || size > PATH_MAX) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, oid, OBJ_BLOB, + FSCK_MSG_SYMLINK_TARGET_LENGTH, + "symlink target too long"); + } + + while (!reported && ptr) { + const char *p = ptr; + char c, *slash = strchrnul(ptr, '/'); + char *backslash = memchr(ptr, '\\', slash - ptr); + + c = *slash; + *slash = '\0'; + + while (!reported && backslash) { + *backslash = '\0'; + if (is_ntfs_dotgit(p)) + ret |= report(options, reported = oid, OBJ_BLOB, + FSCK_MSG_SYMLINK_POINTS_TO_GIT_DIR, + "symlink target points to git dir"); + *backslash = '\\'; + p = backslash + 1; + backslash = memchr(p, '\\', slash - p); + } + if (!reported && is_ntfs_dotgit(p)) + ret |= report(options, reported = oid, OBJ_BLOB, + FSCK_MSG_SYMLINK_POINTS_TO_GIT_DIR, + "symlink target points to git dir"); + + if (!reported && is_hfs_dotgit(ptr)) + ret |= report(options, reported = oid, OBJ_BLOB, + FSCK_MSG_SYMLINK_POINTS_TO_GIT_DIR, + "symlink target points to git dir"); + + *slash = c; + ptr = c ? slash + 1 : NULL; + } + } + return ret; } @@ -1319,6 +1371,10 @@ int fsck_finish(struct fsck_options *options) FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB, options, ".gitattributes"); + ret |= fsck_blobs(&options->symlink_targets_found, &options->symlink_targets_done, + FSCK_MSG_SYMLINK_TARGET_MISSING, FSCK_MSG_SYMLINK_TARGET_BLOB, + options, ""); + return ret; } diff --git a/fsck.h b/fsck.h index fcecf4101c..130fa8d8f9 100644 --- a/fsck.h +++ b/fsck.h @@ -63,6 +63,8 @@ enum fsck_msg_type { FUNC(GITATTRIBUTES_LARGE, ERROR) \ FUNC(GITATTRIBUTES_LINE_LENGTH, ERROR) \ FUNC(GITATTRIBUTES_BLOB, ERROR) \ + FUNC(SYMLINK_TARGET_MISSING, ERROR) \ + FUNC(SYMLINK_TARGET_BLOB, ERROR) \ /* warnings */ \ FUNC(EMPTY_NAME, WARN) \ FUNC(FULL_PATHNAME, WARN) \ @@ -72,6 +74,8 @@ enum fsck_msg_type { FUNC(NULL_SHA1, WARN) \ FUNC(ZERO_PADDED_FILEMODE, WARN) \ FUNC(NUL_IN_COMMIT, WARN) \ + FUNC(SYMLINK_TARGET_LENGTH, WARN) \ + FUNC(SYMLINK_POINTS_TO_GIT_DIR, WARN) \ /* infos (reported as warnings, but ignored by default) */ \ FUNC(BAD_FILEMODE, INFO) \ FUNC(GITMODULES_PARSE, INFO) \ @@ -139,6 +143,8 @@ struct fsck_options { struct oidset gitmodules_done; struct oidset gitattributes_found; struct oidset gitattributes_done; + struct oidset symlink_targets_found; + struct oidset symlink_targets_done; kh_oid_map_t *object_names; }; @@ -148,6 +154,8 @@ struct fsck_options { .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ .gitattributes_done = OIDSET_INIT, \ + .symlink_targets_found = OIDSET_INIT, \ + .symlink_targets_done = OIDSET_INIT, \ .error_func = fsck_error_function \ } #define FSCK_OPTIONS_STRICT { \ @@ -156,6 +164,8 @@ struct fsck_options { .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ .gitattributes_done = OIDSET_INIT, \ + .symlink_targets_found = OIDSET_INIT, \ + .symlink_targets_done = OIDSET_INIT, \ .error_func = fsck_error_function, \ } #define FSCK_OPTIONS_MISSING_GITMODULES { \ @@ -164,6 +174,8 @@ struct fsck_options { .gitmodules_done = OIDSET_INIT, \ .gitattributes_found = OIDSET_INIT, \ .gitattributes_done = OIDSET_INIT, \ + .symlink_targets_found = OIDSET_INIT, \ + .symlink_targets_done = OIDSET_INIT, \ .error_func = fsck_error_cb_print_missing_gitmodules, \ } diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index de0f6d5e7f..5669872bc8 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -1023,4 +1023,41 @@ test_expect_success 'fsck error on gitattributes with excessive size' ' test_cmp expected actual ' +test_expect_success 'fsck warning on symlink target with excessive length' ' + symlink_target=$(printf "pattern %032769d" 1 | git hash-object -w --stdin) && + test_when_finished "remove_object $symlink_target" && + tree=$(printf "120000 blob %s\t%s\n" $symlink_target symlink | git mktree) && + test_when_finished "remove_object $tree" && + cat >expected <<-EOF && + warning in blob $symlink_target: symlinkTargetLength: symlink target too long + EOF + git fsck --no-dangling >actual 2>&1 && + test_cmp expected actual +' + +test_expect_success 'fsck warning on symlink target pointing inside git dir' ' + gitdir=$(printf ".git" | git hash-object -w --stdin) && + ntfs_gitdir=$(printf "GIT~1" | git hash-object -w --stdin) && + hfs_gitdir=$(printf ".${u200c}git" | git hash-object -w --stdin) && + inside_gitdir=$(printf "nested/.git/config" | git hash-object -w --stdin) && + benign_target=$(printf "legit/config" | git hash-object -w --stdin) && + tree=$(printf "120000 blob %s\t%s\n" \ + $benign_target benign_target \ + $gitdir gitdir \ + $hfs_gitdir hfs_gitdir \ + $inside_gitdir inside_gitdir \ + $ntfs_gitdir ntfs_gitdir | + git mktree) && + for o in $gitdir $ntfs_gitdir $hfs_gitdir $inside_gitdir $benign_target $tree + do + test_when_finished "remove_object $o" || return 1 + done && + printf "warning in blob %s: symlinkPointsToGitDir: symlink target points to git dir\n" \ + $gitdir $hfs_gitdir $inside_gitdir $ntfs_gitdir | + sort >expected && + git fsck --no-dangling >actual 2>&1 && + sort actual >actual.sorted && + test_cmp expected actual.sorted +' + test_done -- cgit v1.3