From 6677c4665af2d73f670bec382bc82d0f2e9513fb Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 15 Dec 2005 12:54:00 -0800 Subject: get_sha1_basic(): corner case ambiguity fix When .git/refs/heads/frotz and .git/refs/tags/frotz existed, and the object name stored in .git/refs/heads/frotz were corrupt, we ended up picking tags/frotz without complaining. Worse yet, if the corrupt .git/refs/heads/frotz was more than 40 bytes and began with hexadecimal characters, it silently overwritten the initial part of the returned result. This commit adds a couple of tests to demonstrate these cases, with a fix. Signed-off-by: Junio C Hamano --- t/t0000-basic.sh | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ t/test-lib.sh | 1 + 2 files changed, 49 insertions(+) (limited to 't') diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index bc3e711a52..ffa723ea8b 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -205,4 +205,52 @@ test_expect_success \ 'no diff after checkout and git-update-index --refresh.' \ 'git-diff-files >current && cmp -s current /dev/null' + +# extended sha1 parsing and ambiguity resolution + +GIT_AUTHOR_DATE='1995-01-29T16:00:00 -0800' +GIT_AUTHOR_EMAIL=a.u.thor@example.com +GIT_AUTHOR_NAME='A U Thor' +GIT_COMMITTER_DATE='1995-01-29T16:00:00 -0800' +GIT_COMMITTER_EMAIL=c.o.mmitter@example.com +GIT_COMMITTER_NAME='C O Mmitter' +export GIT_AUTHOR_DATE +export GIT_AUTHOR_EMAIL +export GIT_AUTHOR_NAME +export GIT_COMMITTER_DATE +export GIT_COMMITTER_EMAIL +export GIT_COMMITTER_NAME + +test_expect_success \ + 'initial commit.' \ + 'commit=$(echo Initial commit | git-commit-tree $tree) && + echo "$commit" >.git/refs/heads/master && + git-ls-tree HEAD && + test "$commit" = 51a092e9ef6cbbe66d258acd17599d3f80be6162' + +test_expect_success \ + 'Ambiguous' \ + 'echo "$commit" >.git/refs/heads/nasty && + echo "$commit" >.git/refs/tags/nasty && + if git-rev-parse --verify nasty + then + echo "should have barfed" + false + else + : + fi && + # names directly underneath .git/ should not interfere + echo "$commit" >.git/refs/heads/description && + git-rev-parse --verify description && + # broken object name + echo fffffffffffffffffffffffffffffffffffffffg \ + >.git/refs/heads/nasty && + if git-rev-parse --verify nasty + then + echo "should have barfed" + false + else + : + fi' + test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index 2819bef1c4..a97d259e26 100755 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -18,6 +18,7 @@ unset GIT_ALTERNATE_OBJECT_DIRECTORIES unset GIT_AUTHOR_DATE unset GIT_AUTHOR_EMAIL unset GIT_AUTHOR_NAME +unset GIT_COMMITTER_DATE unset GIT_COMMITTER_EMAIL unset GIT_COMMITTER_NAME unset GIT_DIFF_OPTS -- cgit v1.3 From c054d64e8783e5ac2fa68c382f00df9087bca0f9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 17 Dec 2005 00:00:50 -0800 Subject: Revert "get_sha1_basic(): corner case ambiguity fix" This reverts 6677c4665af2d73f670bec382bc82d0f2e9513fb commit. The misguided disambiguation has been reverted, so there is no point testing that misfeature. --- sha1_name.c | 2 +- t/t0000-basic.sh | 48 ------------------------------------------------ 2 files changed, 1 insertion(+), 49 deletions(-) (limited to 't') diff --git a/sha1_name.c b/sha1_name.c index 49e2cc394f..67b69a54fb 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -223,7 +223,7 @@ static int ambiguous_path(const char *path, int len) slash = 0; continue; } - return slash; + break; } return slash; } diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index ffa723ea8b..bc3e711a52 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -205,52 +205,4 @@ test_expect_success \ 'no diff after checkout and git-update-index --refresh.' \ 'git-diff-files >current && cmp -s current /dev/null' - -# extended sha1 parsing and ambiguity resolution - -GIT_AUTHOR_DATE='1995-01-29T16:00:00 -0800' -GIT_AUTHOR_EMAIL=a.u.thor@example.com -GIT_AUTHOR_NAME='A U Thor' -GIT_COMMITTER_DATE='1995-01-29T16:00:00 -0800' -GIT_COMMITTER_EMAIL=c.o.mmitter@example.com -GIT_COMMITTER_NAME='C O Mmitter' -export GIT_AUTHOR_DATE -export GIT_AUTHOR_EMAIL -export GIT_AUTHOR_NAME -export GIT_COMMITTER_DATE -export GIT_COMMITTER_EMAIL -export GIT_COMMITTER_NAME - -test_expect_success \ - 'initial commit.' \ - 'commit=$(echo Initial commit | git-commit-tree $tree) && - echo "$commit" >.git/refs/heads/master && - git-ls-tree HEAD && - test "$commit" = 51a092e9ef6cbbe66d258acd17599d3f80be6162' - -test_expect_success \ - 'Ambiguous' \ - 'echo "$commit" >.git/refs/heads/nasty && - echo "$commit" >.git/refs/tags/nasty && - if git-rev-parse --verify nasty - then - echo "should have barfed" - false - else - : - fi && - # names directly underneath .git/ should not interfere - echo "$commit" >.git/refs/heads/description && - git-rev-parse --verify description && - # broken object name - echo fffffffffffffffffffffffffffffffffffffffg \ - >.git/refs/heads/nasty && - if git-rev-parse --verify nasty - then - echo "should have barfed" - false - else - : - fi' - test_done -- cgit v1.3 From 1fdfd05db2f6e6bacd8c8255992fa4a7f1756176 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 19 Dec 2005 18:27:04 -0800 Subject: tests: make scripts executable just for consistency. Signed-off-by: Junio C Hamano --- t/t1200-tutorial.sh | 0 t/t1300-repo-config.sh | 0 t/t3101-ls-tree-dirname.sh | 0 t/t4103-apply-binary.sh | 0 t/t4109-apply-multifrag.sh | 0 t/t4110-apply-scan.sh | 0 t/t5500-fetch-pack.sh | 0 t/t6101-rev-parse-parents.sh | 0 8 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 t/t1200-tutorial.sh mode change 100644 => 100755 t/t1300-repo-config.sh mode change 100644 => 100755 t/t3101-ls-tree-dirname.sh mode change 100644 => 100755 t/t4103-apply-binary.sh mode change 100644 => 100755 t/t4109-apply-multifrag.sh mode change 100644 => 100755 t/t4110-apply-scan.sh mode change 100644 => 100755 t/t5500-fetch-pack.sh mode change 100644 => 100755 t/t6101-rev-parse-parents.sh (limited to 't') diff --git a/t/t1200-tutorial.sh b/t/t1200-tutorial.sh old mode 100644 new mode 100755 diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh old mode 100644 new mode 100755 diff --git a/t/t3101-ls-tree-dirname.sh b/t/t3101-ls-tree-dirname.sh old mode 100644 new mode 100755 diff --git a/t/t4103-apply-binary.sh b/t/t4103-apply-binary.sh old mode 100644 new mode 100755 diff --git a/t/t4109-apply-multifrag.sh b/t/t4109-apply-multifrag.sh old mode 100644 new mode 100755 diff --git a/t/t4110-apply-scan.sh b/t/t4110-apply-scan.sh old mode 100644 new mode 100755 diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh old mode 100644 new mode 100755 diff --git a/t/t6101-rev-parse-parents.sh b/t/t6101-rev-parse-parents.sh old mode 100644 new mode 100755 -- cgit v1.3 From 29e4d3635709778bcc808dbad0477efad82f8d7e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 20 Dec 2005 00:02:15 -0800 Subject: Racy GIT This fixes the longstanding "Racy GIT" problem, which was pretty much there from the beginning of time, but was first demonstrated by Pasky in this message on October 24, 2005: http://marc.theaimsgroup.com/?l=git&m=113014629716878 If you run the following sequence of commands: echo frotz >infocom git update-index --add infocom echo xyzzy >infocom so that the second update to file "infocom" does not change st_mtime, what is recorded as the stat information for the cache entry "infocom" exactly matches what is on the filesystem (owner, group, inum, mtime, ctime, mode, length). After this sequence, we incorrectly think "infocom" file still has string "frotz" in it, and get really confused. E.g. git-diff-files would say there is no change, git-update-index --refresh would not even look at the filesystem to correct the situation. Some ways of working around this issue were already suggested by Linus in the same thread on the same day, including waiting until the next second before returning from update-index if a cache entry written out has the current timestamp, but that means we can make at most one commit per second, and given that the e-mail patch workflow used by Linus needs to process at least 5 commits per second, it is not an acceptable solution. Linus notes that git-apply is primarily used to update the index while processing e-mailed patches, which is true, and git-apply's up-to-date check is fooled by the same problem but luckily in the other direction, so it is not really a big issue, but still it is disturbing. The function ce_match_stat() is called to bypass the comparison against filesystem data when the stat data recorded in the cache entry matches what stat() returns from the filesystem. This patch tackles the problem by changing it to actually go to the filesystem data for cache entries that have the same mtime as the index file itself. This works as long as the index file and working tree files are on the filesystems that share the same monotonic clock. Files on network mounted filesystems sometimes get skewed timestamps compared to "date" output, but as long as working tree files' timestamps are skewed the same way as the index file's, this approach still works. The only problematic files are the ones that have the same timestamp as the index file's, because two file updates that sandwitch the index file update must happen within the same second to trigger the problem. Signed-off-by: Junio C Hamano --- read-cache.c | 140 ++++++++++++++++++++++++++++++++-------------------- t/t0010-racy-git.sh | 24 +++++++++ 2 files changed, 110 insertions(+), 54 deletions(-) create mode 100755 t/t0010-racy-git.sh (limited to 't') diff --git a/read-cache.c b/read-cache.c index 6932736203..780601f071 100644 --- a/read-cache.c +++ b/read-cache.c @@ -6,6 +6,7 @@ #include "cache.h" struct cache_entry **active_cache = NULL; +static time_t index_file_timestamp; unsigned int active_nr = 0, active_alloc = 0, active_cache_changed = 0; /* @@ -28,6 +29,64 @@ void fill_stat_cache_info(struct cache_entry *ce, struct stat *st) ce->ce_size = htonl(st->st_size); } +static int ce_compare_data(struct cache_entry *ce, struct stat *st) +{ + int match = -1; + int fd = open(ce->name, O_RDONLY); + + if (fd >= 0) { + unsigned char sha1[20]; + if (!index_fd(sha1, fd, st, 0, NULL)) + match = memcmp(sha1, ce->sha1, 20); + close(fd); + } + return match; +} + +static int ce_compare_link(struct cache_entry *ce, unsigned long expected_size) +{ + int match = -1; + char *target; + void *buffer; + unsigned long size; + char type[10]; + int len; + + target = xmalloc(expected_size); + len = readlink(ce->name, target, expected_size); + if (len != expected_size) { + free(target); + return -1; + } + buffer = read_sha1_file(ce->sha1, type, &size); + if (!buffer) { + free(target); + return -1; + } + if (size == expected_size) + match = memcmp(buffer, target, size); + free(buffer); + free(target); + return match; +} + +static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st) +{ + switch (st->st_mode & S_IFMT) { + case S_IFREG: + if (ce_compare_data(ce, st)) + return DATA_CHANGED; + break; + case S_IFLNK: + if (ce_compare_link(ce, st->st_size)) + return DATA_CHANGED; + break; + default: + return TYPE_CHANGED; + } + return 0; +} + int ce_match_stat(struct cache_entry *ce, struct stat *st) { unsigned int changed = 0; @@ -83,57 +142,37 @@ int ce_match_stat(struct cache_entry *ce, struct stat *st) if (ce->ce_size != htonl(st->st_size)) changed |= DATA_CHANGED; - return changed; -} - -static int ce_compare_data(struct cache_entry *ce, struct stat *st) -{ - int match = -1; - int fd = open(ce->name, O_RDONLY); - if (fd >= 0) { - unsigned char sha1[20]; - if (!index_fd(sha1, fd, st, 0, NULL)) - match = memcmp(sha1, ce->sha1, 20); - close(fd); - } - return match; -} - -static int ce_compare_link(struct cache_entry *ce, unsigned long expected_size) -{ - int match = -1; - char *target; - void *buffer; - unsigned long size; - char type[10]; - int len; + /* + * Within 1 second of this sequence: + * echo xyzzy >file && git-update-index --add file + * running this command: + * echo frotz >file + * would give a falsely clean cache entry. The mtime and + * length match the cache, and other stat fields do not change. + * + * We could detect this at update-index time (the cache entry + * being registered/updated records the same time as "now") + * and delay the return from git-update-index, but that would + * effectively mean we can make at most one commit per second, + * which is not acceptable. Instead, we check cache entries + * whose mtime are the same as the index file timestamp more + * careful than others. + */ + if (!changed && + index_file_timestamp && + index_file_timestamp <= ntohl(ce->ce_mtime.sec)) + changed |= ce_modified_check_fs(ce, st); - target = xmalloc(expected_size); - len = readlink(ce->name, target, expected_size); - if (len != expected_size) { - free(target); - return -1; - } - buffer = read_sha1_file(ce->sha1, type, &size); - if (!buffer) { - free(target); - return -1; - } - if (size == expected_size) - match = memcmp(buffer, target, size); - free(buffer); - free(target); - return match; + return changed; } int ce_modified(struct cache_entry *ce, struct stat *st) { - int changed; + int changed, changed_fs; changed = ce_match_stat(ce, st); if (!changed) return 0; - /* * If the mode or type has changed, there's no point in trying * to refresh the entry - it's not going to match @@ -148,18 +187,9 @@ int ce_modified(struct cache_entry *ce, struct stat *st) if ((changed & DATA_CHANGED) && ce->ce_size != htonl(0)) return changed; - switch (st->st_mode & S_IFMT) { - case S_IFREG: - if (ce_compare_data(ce, st)) - return changed | DATA_CHANGED; - break; - case S_IFLNK: - if (ce_compare_link(ce, st->st_size)) - return changed | DATA_CHANGED; - break; - default: - return changed | TYPE_CHANGED; - } + changed_fs = ce_modified_check_fs(ce, st); + if (changed_fs) + return changed | changed_fs; return 0; } @@ -471,6 +501,7 @@ int read_cache(void) return active_nr; errno = ENOENT; + index_file_timestamp = 0; fd = open(get_index_file(), O_RDONLY); if (fd < 0) { if (errno == ENOENT) @@ -504,6 +535,7 @@ int read_cache(void) offset = offset + ce_size(ce); active_cache[i] = ce; } + index_file_timestamp = st.st_mtime; return active_nr; unmap: diff --git a/t/t0010-racy-git.sh b/t/t0010-racy-git.sh new file mode 100755 index 0000000000..eb175b780f --- /dev/null +++ b/t/t0010-racy-git.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='racy GIT' + +. ./test-lib.sh + +# This test can give false success if your machine is sufficiently +# slow or your trial happened to happen on second boundary. + +for trial in 0 1 2 3 4 5 6 7 8 9 +do + rm -f .git/index + echo frotz >infocom + echo xyzzy >activision + git update-index --add infocom activision + echo xyzzy >infocom + + files=`git diff-files -p` + test_expect_success \ + "Racy GIT trial #$trial" \ + 'test "" != "$files"' +done + +test_done -- cgit v1.3 From 407c8eb0d09d4b84bbfda9e04895a35c8fd6fef6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 20 Dec 2005 12:12:18 -0800 Subject: Racy GIT (part #2) The previous round caught the most trivial case well, but broke down once index file is updated again. Smudge problematic entries (they should be very few if any under normal interactive workflow) before writing a new index file out. Signed-off-by: Junio C Hamano --- read-cache.c | 32 +++++++++++++++++++++++++++++++- t/t0010-racy-git.sh | 17 +++++++++++++---- 2 files changed, 44 insertions(+), 5 deletions(-) (limited to 't') diff --git a/read-cache.c b/read-cache.c index 780601f071..afdc2b075b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -87,7 +87,7 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st) return 0; } -int ce_match_stat(struct cache_entry *ce, struct stat *st) +static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st) { unsigned int changed = 0; @@ -143,6 +143,13 @@ int ce_match_stat(struct cache_entry *ce, struct stat *st) if (ce->ce_size != htonl(st->st_size)) changed |= DATA_CHANGED; + return changed; +} + +int ce_match_stat(struct cache_entry *ce, struct stat *st) +{ + unsigned int changed = ce_match_stat_basic(ce, st); + /* * Within 1 second of this sequence: * echo xyzzy >file && git-update-index --add file @@ -594,6 +601,26 @@ static int ce_flush(SHA_CTX *context, int fd) return 0; } +static void ce_smudge_racily_clean_entry(struct cache_entry *ce) +{ + /* + * The only thing we care about in this function is to smudge the + * falsely clean entry due to touch-update-touch race, so we leave + * everything else as they are. We are called for entries whose + * ce_mtime match the index file mtime. + */ + struct stat st; + + if (lstat(ce->name, &st) < 0) + return; + if (ce_match_stat_basic(ce, &st)) + return; + if (ce_modified_check_fs(ce, &st)) { + /* This is "racily clean"; smudge it */ + ce->ce_size = htonl(0); + } +} + int write_cache(int newfd, struct cache_entry **cache, int entries) { SHA_CTX c; @@ -616,6 +643,9 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) struct cache_entry *ce = cache[i]; if (!ce->ce_mode) continue; + if (index_file_timestamp && + index_file_timestamp <= ntohl(ce->ce_mtime.sec)) + ce_smudge_racily_clean_entry(ce); if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } diff --git a/t/t0010-racy-git.sh b/t/t0010-racy-git.sh index eb175b780f..e45a9e40e4 100755 --- a/t/t0010-racy-git.sh +++ b/t/t0010-racy-git.sh @@ -7,18 +7,27 @@ test_description='racy GIT' # This test can give false success if your machine is sufficiently # slow or your trial happened to happen on second boundary. -for trial in 0 1 2 3 4 5 6 7 8 9 +for trial in 0 1 2 3 4 do rm -f .git/index echo frotz >infocom - echo xyzzy >activision - git update-index --add infocom activision + git update-index --add infocom echo xyzzy >infocom files=`git diff-files -p` test_expect_success \ - "Racy GIT trial #$trial" \ + "Racy GIT trial #$trial part A" \ 'test "" != "$files"' + + sleep 1 + echo xyzzy >cornerstone + git update-index --add cornerstone + + files=`git diff-files -p` + test_expect_success \ + "Racy GIT trial #$trial part B" \ + 'test "" != "$files"' + done test_done -- cgit v1.3