From e7cb0b4455c85b53aeba40f88ffddcf6d4002498 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 11 May 2018 16:03:54 +0200 Subject: is_ntfs_dotgit: match other .git files When we started to catch NTFS short names that clash with .git, we only looked for GIT~1. This is sufficient because we only ever clone into an empty directory, so .git is guaranteed to be the first subdirectory or file in that directory. However, even with a fresh clone, .gitmodules is *not* necessarily the first file to be written that would want the NTFS short name GITMOD~1: a malicious repository can add .gitmodul0000 and friends, which sorts before `.gitmodules` and is therefore checked out *first*. For that reason, we have to test not only for ~1 short names, but for others, too. It's hard to just adapt the existing checks in is_ntfs_dotgit(): since Windows 2000 (i.e., in all Windows versions still supported by Git), NTFS short names are only generated in the ~ form up to number 4. After that, a *different* prefix is used, calculated from the long file name using an undocumented, but stable algorithm. For example, the short name of .gitmodules would be GITMOD~1, but if it is taken, and all of ~2, ~3 and ~4 are taken, too, the short name GI7EBA~1 will be used. From there, collisions are handled by incrementing the number, shortening the prefix as needed (until ~9999999 is reached, in which case NTFS will not allow the file to be created). We'd also want to handle .gitignore and .gitattributes, which suffer from a similar problem, using the fall-back short names GI250A~1 and GI7D29~1, respectively. To accommodate for that, we could reimplement the hashing algorithm, but it is just safer and simpler to provide the known prefixes. This algorithm has been reverse-engineered and described at https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but still available via https://web.archive.org/. These can be recomputed by running the following Perl script: -- snip -- use warnings; use strict; sub compute_short_name_hash ($) { my $checksum = 0; foreach (split('', $_[0])) { $checksum = ($checksum * 0x25 + ord($_)) & 0xffff; } $checksum = ($checksum * 314159269) & 0xffffffff; $checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000); $checksum -= (($checksum * 1152921497) >> 60) * 1000000007; return scalar reverse sprintf("%x", $checksum & 0xffff); } print compute_short_name_hash($ARGV[0]); -- snap -- E.g., running that with the argument ".gitignore" will result in "250a" (which then becomes "gi250a" in the code). Signed-off-by: Johannes Schindelin Signed-off-by: Jeff King --- cache.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'cache.h') diff --git a/cache.h b/cache.h index c1041cc02b..041c0fb261 100644 --- a/cache.h +++ b/cache.h @@ -1188,7 +1188,15 @@ int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); -extern int is_ntfs_dotgit(const char *name); + +/* + * These functions match their is_hfs_dotgit() counterparts; see utf8.h for + * details. + */ +int is_ntfs_dotgit(const char *name); +int is_ntfs_dotgitmodules(const char *name); +int is_ntfs_dotgitignore(const char *name); +int is_ntfs_dotgitattributes(const char *name); /* * Returns true iff "str" could be confused as a command-line option when -- cgit v1.3 From 10ecfa76491e4923988337b2e2243b05376b40de Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 4 May 2018 20:03:35 -0400 Subject: verify_path: disallow symlinks in .gitmodules There are a few reasons it's not a good idea to make .gitmodules a symlink, including: 1. It won't be portable to systems without symlinks. 2. It may behave inconsistently, since Git may look at this file in the index or a tree without bothering to resolve any symbolic links. We don't do this _yet_, but the config infrastructure is there and it's planned for the future. With some clever code, we could make (2) work. And some people may not care about (1) if they only work on one platform. But there are a few security reasons to simply disallow it: a. A symlinked .gitmodules file may circumvent any fsck checks of the content. b. Git may read and write from the on-disk file without sanity checking the symlink target. So for example, if you link ".gitmodules" to "../oops" and run "git submodule add", we'll write to the file "oops" outside the repository. Again, both of those are problems that _could_ be solved with sufficient code, but given the complications in (1) and (2), we're better off just outlawing it explicitly. Note the slightly tricky call to verify_path() in update-index's update_one(). There we may not have a mode if we're not updating from the filesystem (e.g., we might just be removing the file). Passing "0" as the mode there works fine; since it's not a symlink, we'll just skip the extra checks. Signed-off-by: Jeff King --- apply.c | 4 ++-- builtin/update-index.c | 6 +++--- cache.h | 2 +- read-cache.c | 40 +++++++++++++++++++++++++++++++--------- 4 files changed, 37 insertions(+), 15 deletions(-) (limited to 'cache.h') diff --git a/apply.c b/apply.c index f8bf0bd932..b96d375595 100644 --- a/apply.c +++ b/apply.c @@ -3867,9 +3867,9 @@ static int check_unsafe_path(struct patch *patch) if (!patch->is_delete) new_name = patch->new_name; - if (old_name && !verify_path(old_name)) + if (old_name && !verify_path(old_name, patch->old_mode)) return error(_("invalid path '%s'"), old_name); - if (new_name && !verify_path(new_name)) + if (new_name && !verify_path(new_name, patch->new_mode)) return error(_("invalid path '%s'"), new_name); return 0; } diff --git a/builtin/update-index.c b/builtin/update-index.c index 8d152ded77..19216595fb 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -399,7 +399,7 @@ static int add_cacheinfo(unsigned int mode, const struct object_id *oid, int size, len, option; struct cache_entry *ce; - if (!verify_path(path)) + if (!verify_path(path, mode)) return error("Invalid path '%s'", path); len = strlen(path); @@ -452,7 +452,7 @@ static void update_one(const char *path) stat_errno = errno; } /* else stat is valid */ - if (!verify_path(path)) { + if (!verify_path(path, st.st_mode)) { fprintf(stderr, "Ignoring path %s\n", path); return; } @@ -543,7 +543,7 @@ static void read_index_info(int nul_term_line) path_name = uq.buf; } - if (!verify_path(path_name)) { + if (!verify_path(path_name, mode)) { fprintf(stderr, "Ignoring path %s\n", path_name); continue; } diff --git a/cache.h b/cache.h index 041c0fb261..5a44f79e26 100644 --- a/cache.h +++ b/cache.h @@ -598,7 +598,7 @@ extern int read_index_unmerged(struct index_state *); extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); extern int discard_index(struct index_state *); extern int unmerged_index(const struct index_state *); -extern int verify_path(const char *path); +extern int verify_path(const char *path, unsigned mode); extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); extern void adjust_dirname_case(struct index_state *istate, char *name); diff --git a/read-cache.c b/read-cache.c index 333e0c5429..aa99f1f797 100644 --- a/read-cache.c +++ b/read-cache.c @@ -732,7 +732,7 @@ struct cache_entry *make_cache_entry(unsigned int mode, int size, len; struct cache_entry *ce, *ret; - if (!verify_path(path)) { + if (!verify_path(path, mode)) { error("Invalid path '%s'", path); return NULL; } @@ -796,7 +796,7 @@ int ce_same_name(const struct cache_entry *a, const struct cache_entry *b) * Also, we don't want double slashes or slashes at the * end that can make pathnames ambiguous. */ -static int verify_dotfile(const char *rest) +static int verify_dotfile(const char *rest, unsigned mode) { /* * The first character was '.', but that @@ -814,6 +814,9 @@ static int verify_dotfile(const char *rest) * case-insensitively here, even if ignore_case is not set. * This outlaws ".GIT" everywhere out of an abundance of caution, * since there's really no good reason to allow it. + * + * Once we've seen ".git", we can also find ".gitmodules", etc (also + * case-insensitively). */ case 'g': case 'G': @@ -823,6 +826,12 @@ static int verify_dotfile(const char *rest) break; if (rest[3] == '\0' || is_dir_sep(rest[3])) return 0; + if (S_ISLNK(mode)) { + rest += 3; + if (skip_iprefix(rest, "modules", &rest) && + (*rest == '\0' || is_dir_sep(*rest))) + return 0; + } break; case '.': if (rest[1] == '\0' || is_dir_sep(rest[1])) @@ -831,7 +840,7 @@ static int verify_dotfile(const char *rest) return 1; } -int verify_path(const char *path) +int verify_path(const char *path, unsigned mode) { char c; @@ -844,12 +853,25 @@ int verify_path(const char *path) return 1; if (is_dir_sep(c)) { inside: - if (protect_hfs && is_hfs_dotgit(path)) - return 0; - if (protect_ntfs && is_ntfs_dotgit(path)) - return 0; + if (protect_hfs) { + if (is_hfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_hfs_dotgitmodules(path)) + return 0; + } + } + if (protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } + } + c = *path++; - if ((c == '.' && !verify_dotfile(path)) || + if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; } @@ -1166,7 +1188,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e if (!ok_to_add) return -1; - if (!verify_path(ce->name)) + if (!verify_path(ce->name, ce->ce_mode)) return error("Invalid path '%s'", ce->name); if (!skip_df_check && -- cgit v1.3