From 230f13225df8b7e7eb0acc91a8c630f9e84967c1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 8 Oct 2005 15:54:01 -0700 Subject: Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- prune-packed.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'prune-packed.c') diff --git a/prune-packed.c b/prune-packed.c index 5306e8e5ef..73f0f3a462 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -26,6 +26,9 @@ static void prune_dir(int i, DIR *dir, char *pathname, int len) else if (unlink(pathname) < 0) error("unable to unlink %s", pathname); } + pathname[len] = 0; + if (rmdir(pathname)) + mkdir(pathname, 0777); } static void prune_packed_objects(void) @@ -46,7 +49,7 @@ static void prune_packed_objects(void) sprintf(pathname + len, "%02x/", i); d = opendir(pathname); if (!d) - die("unable to open %s", pathname); + continue; prune_dir(i, d, pathname, len + 3); closedir(d); } -- cgit v1.3-6-g1900 From b8041fe4d84bc75212cb6161d508fd1e9d2203ab Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 16 Oct 2005 14:09:50 -0700 Subject: Sparse-directory safety fix. This will be removed when merging the second phase of Linus' "Create object subdirectories on demand" change anyway, but the code to recreate the empty .git/objects/??/ directory was confused. Signed-off-by: Junio C Hamano --- prune-packed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'prune-packed.c') diff --git a/prune-packed.c b/prune-packed.c index 73f0f3a462..1e0fc0cd9e 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -27,7 +27,7 @@ static void prune_dir(int i, DIR *dir, char *pathname, int len) error("unable to unlink %s", pathname); } pathname[len] = 0; - if (rmdir(pathname)) + if (!rmdir(pathname)) mkdir(pathname, 0777); } -- cgit v1.3-6-g1900 From 9106c097ad87577019544f45fda11c4d73986597 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Oct 2005 02:30:17 -0700 Subject: Create object subdirectories on demand (phase II) This removes the unoptimization. The previous round does not mind missing fan-out directories, but still makes sure they exist, lest older versions choke on a repository created/packed by it. This round does not play that nicely anymore -- empty fan-out directories are not created by init-db, and will stay removed by prune-packed. The prune command also removes empty fan-out directories. Signed-off-by: Junio C Hamano --- git-prune.sh | 1 + init-db.c | 4 ---- prune-packed.c | 3 +-- t/t0000-basic.sh | 8 ++++---- 4 files changed, 6 insertions(+), 10 deletions(-) (limited to 'prune-packed.c') diff --git a/git-prune.sh b/git-prune.sh index 9657dbf271..b28630cacf 100755 --- a/git-prune.sh +++ b/git-prune.sh @@ -22,6 +22,7 @@ sed -ne '/unreachable /{ }' | { cd "$GIT_OBJECT_DIRECTORY" || exit xargs $echo rm -f + rmdir 2>/dev/null [0-9a-f][0-9a-f] } git-prune-packed $dryrun diff --git a/init-db.c b/init-db.c index 2a4aa3c196..ca6fa4d420 100644 --- a/init-db.c +++ b/init-db.c @@ -281,10 +281,6 @@ int main(int argc, char **argv) memcpy(path, sha1_dir, len); safe_create_dir(sha1_dir); - for (i = 0; i < 256; i++) { - sprintf(path+len, "/%02x", i); - safe_create_dir(path); - } strcpy(path+len, "/pack"); safe_create_dir(path); strcpy(path+len, "/info"); diff --git a/prune-packed.c b/prune-packed.c index 1e0fc0cd9e..16685d1d8b 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -27,8 +27,7 @@ static void prune_dir(int i, DIR *dir, char *pathname, int len) error("unable to unlink %s", pathname); } pathname[len] = 0; - if (!rmdir(pathname)) - mkdir(pathname, 0777); + rmdir(pathname); } static void prune_packed_objects(void) diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 5c5f854858..dff7d69163 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -28,12 +28,12 @@ test_expect_success \ '.git/objects should be empty after git-init-db in an empty repo.' \ 'cmp -s /dev/null should-be-empty' -# also it should have 258 subdirectories; 256 fan-out anymore, pack, and info. -# 259 is counting "objects" itself +# also it should have 2 subdirectories; no fan-out anymore, pack, and info. +# 3 is counting "objects" itself find .git/objects -type d -print >full-of-directories test_expect_success \ - '.git/objects should have 258 subdirectories.' \ - 'test $(wc -l < full-of-directories) = 259' + '.git/objects should have 3 subdirectories.' \ + 'test $(wc -l < full-of-directories) = 3' ################################################################ # Basics of the basics -- cgit v1.3-6-g1900 From 41f222e87a9062833712367d66114cae90b3769a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 28 Oct 2005 09:45:53 -0700 Subject: Be marginally more careful about removing objects The git philosophy when it comes to disk accesses is "Laugh in the face of danger". Notably, since we never modify an existing object, we don't really care that deeply about flushing things to disk, since even if the machine crashes in the middle of a git operation, you can never really have lost any old work. At most, you'd need to figure out the proper heads (which git-fsck-objects can do for you) and re-do the operation. However, there's two exceptions to this: pruning and repacking. Those operations will actually _delete_ old objects that they know about in other ways (ie that they just repacked, or that they have found in other places). However, since they actually modify old state, we should thus be a bit more careful about them. If the machine crashes and the duplicate new objects haven't been flushed to disk, you can actually be in trouble. This is trivially stupid about it by calling "sync" before removing the objects. Not very smart, but we're talking about special operations than are usually done once a week if that. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- git-prune.sh | 1 + git-repack.sh | 1 + prune-packed.c | 1 + 3 files changed, 3 insertions(+) (limited to 'prune-packed.c') diff --git a/git-prune.sh b/git-prune.sh index b28630cacf..ef31bd2a68 100755 --- a/git-prune.sh +++ b/git-prune.sh @@ -15,6 +15,7 @@ do shift; done +sync git-fsck-objects --full --cache --unreachable "$@" | sed -ne '/unreachable /{ s/unreachable [^ ][^ ]* // diff --git a/git-repack.sh b/git-repack.sh index 49547a77c7..d341966efb 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -62,6 +62,7 @@ then # all-into-one is used. if test "$all_into_one" != '' && test "$existing" != '' then + sync ( cd "$PACKDIR" && for e in $existing do diff --git a/prune-packed.c b/prune-packed.c index 16685d1d8b..26123f7f6b 100644 --- a/prune-packed.c +++ b/prune-packed.c @@ -71,6 +71,7 @@ int main(int argc, char **argv) /* Handle arguments here .. */ usage(prune_packed_usage); } + sync(); prune_packed_objects(); return 0; } -- cgit v1.3-6-g1900