From fe1b22686f26bed3047294cc4552e50ce58fa954 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:33:13 -0400 Subject: foreach_alt_odb: propagate return value from callback We check the return value of the callback and stop iterating if it is non-zero. However, we do not make the non-zero return value available to the caller, so they have no way of knowing whether the operation succeeded or not (technically they can keep their own error flag in the callback data, but that is unlike our other for_each functions). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'cache.h') diff --git a/cache.h b/cache.h index 3e6a914dba..de290c4f07 100644 --- a/cache.h +++ b/cache.h @@ -1143,7 +1143,7 @@ extern void prepare_alt_odb(void); extern void read_info_alternates(const char * relative_base, int depth); extern void add_to_alternates_file(const char *reference); typedef int alt_odb_fn(struct alternate_object_database *, void *); -extern void foreach_alt_odb(alt_odb_fn, void*); +extern int foreach_alt_odb(alt_odb_fn, void*); struct pack_window { struct pack_window *next; -- cgit v1.3 From 27e1e22d5ee3005f228b67ea94b5af29547b54fe Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:38:55 -0400 Subject: prune: factor out loose-object directory traversal Prune has to walk $GIT_DIR/objects/?? in order to find the set of loose objects to prune. Other parts of the code (e.g., count-objects) want to do the same. Let's factor it out into a reusable for_each-style function. Note that this is not quite a straight code movement. The original code had strange behavior when it found a file of the form "[0-9a-f]{2}/.{38}" that did _not_ contain all hex digits. It executed a "break" from the loop, meaning that we stopped pruning in that directory (but still pruned other directories!). This was probably a bug; we do not want to process the file as an object, but we should keep going otherwise (and that is how the new code handles it). We are also a little more careful with loose object directories which fail to open. The original code silently ignored any failures, but the new code will complain about any problems besides ENOENT. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/prune.c | 87 +++++++++++++++++---------------------------------------- cache.h | 33 ++++++++++++++++++++++ sha1_file.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 61 deletions(-) (limited to 'cache.h') diff --git a/builtin/prune.c b/builtin/prune.c index 144a3bdb33..763f53e06a 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath) return 0; } -static int prune_object(const char *fullpath, const unsigned char *sha1) +static int prune_object(const unsigned char *sha1, const char *fullpath, + void *data) { struct stat st; - if (lstat(fullpath, &st)) - return error("Could not stat '%s'", fullpath); + + /* + * Do we know about this object? + * It must have been reachable + */ + if (lookup_object(sha1)) + return 0; + + if (lstat(fullpath, &st)) { + /* report errors, but do not stop pruning */ + error("Could not stat '%s'", fullpath); + return 0; + } if (st.st_mtime > expire) return 0; if (show_only || verbose) { @@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1) return 0; } -static int prune_dir(int i, struct strbuf *path) +static int prune_cruft(const char *basename, const char *path, void *data) { - size_t baselen = path->len; - DIR *dir = opendir(path->buf); - struct dirent *de; - - if (!dir) - return 0; - - while ((de = readdir(dir)) != NULL) { - char name[100]; - unsigned char sha1[20]; - - if (is_dot_or_dotdot(de->d_name)) - continue; - if (strlen(de->d_name) == 38) { - sprintf(name, "%02x", i); - memcpy(name+2, de->d_name, 39); - if (get_sha1_hex(name, sha1) < 0) - break; - - /* - * Do we know about this object? - * It must have been reachable - */ - if (lookup_object(sha1)) - continue; - - strbuf_addf(path, "/%s", de->d_name); - prune_object(path->buf, sha1); - strbuf_setlen(path, baselen); - continue; - } - if (starts_with(de->d_name, "tmp_obj_")) { - strbuf_addf(path, "/%s", de->d_name); - prune_tmp_file(path->buf); - strbuf_setlen(path, baselen); - continue; - } - fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name); - } - closedir(dir); - if (!show_only) - rmdir(path->buf); + if (starts_with(basename, "tmp_obj_")) + prune_tmp_file(path); + else + fprintf(stderr, "bad sha1 file: %s\n", path); return 0; } -static void prune_object_dir(const char *path) +static int prune_subdir(int nr, const char *path, void *data) { - struct strbuf buf = STRBUF_INIT; - size_t baselen; - int i; - - strbuf_addstr(&buf, path); - strbuf_addch(&buf, '/'); - baselen = buf.len; - - for (i = 0; i < 256; i++) { - strbuf_addf(&buf, "%02x", i); - prune_dir(i, &buf); - strbuf_setlen(&buf, baselen); - } + if (!show_only) + rmdir(path); + return 0; } /* @@ -173,7 +137,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix) mark_reachable_objects(&revs, 1, progress); stop_progress(&progress); - prune_object_dir(get_object_directory()); + for_each_loose_file_in_objdir(get_object_directory(), prune_object, + prune_cruft, prune_subdir, NULL); prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0); remove_temporary_files(get_object_directory()); diff --git a/cache.h b/cache.h index de290c4f07..bdfbbcf790 100644 --- a/cache.h +++ b/cache.h @@ -1239,6 +1239,39 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); +/* + * Iterate over the files in the loose-object parts of the object + * directory "path", triggering the following callbacks: + * + * - loose_object is called for each loose object we find. + * + * - loose_cruft is called for any files that do not appear to be + * loose objects. Note that we only look in the loose object + * directories "objects/[0-9a-f]{2}/", so we will not report + * "objects/foobar" as cruft. + * + * - loose_subdir is called for each top-level hashed subdirectory + * of the object directory (e.g., "$OBJDIR/f0"). It is called + * after the objects in the directory are processed. + * + * Any callback that is NULL will be ignored. Callbacks returning non-zero + * will end the iteration. + */ +typedef int each_loose_object_fn(const unsigned char *sha1, + const char *path, + void *data); +typedef int each_loose_cruft_fn(const char *basename, + const char *path, + void *data); +typedef int each_loose_subdir_fn(int nr, + const char *path, + void *data); +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); + struct object_info { /* Request */ enum object_type *typep; diff --git a/sha1_file.c b/sha1_file.c index aaa3c52869..fa08475c91 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3264,3 +3264,87 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect) die("%s is not a valid '%s' object", sha1_to_hex(sha1), typename(expect)); } + +static int for_each_file_in_obj_subdir(int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + size_t baselen = path->len; + DIR *dir = opendir(path->buf); + struct dirent *de; + int r = 0; + + if (!dir) { + if (errno == ENOENT) + return 0; + return error("unable to open %s: %s", path->buf, strerror(errno)); + } + + while ((de = readdir(dir))) { + if (is_dot_or_dotdot(de->d_name)) + continue; + + strbuf_setlen(path, baselen); + strbuf_addf(path, "/%s", de->d_name); + + if (strlen(de->d_name) == 38) { + char hex[41]; + unsigned char sha1[20]; + + snprintf(hex, sizeof(hex), "%02x%s", + subdir_nr, de->d_name); + if (!get_sha1_hex(hex, sha1)) { + if (obj_cb) { + r = obj_cb(sha1, path->buf, data); + if (r) + break; + } + continue; + } + } + + if (cruft_cb) { + r = cruft_cb(de->d_name, path->buf, data); + if (r) + break; + } + } + strbuf_setlen(path, baselen); + + if (!r && subdir_cb) + r = subdir_cb(subdir_nr, path->buf, data); + + closedir(dir); + return r; +} + +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + struct strbuf buf = STRBUF_INIT; + size_t baselen; + int r = 0; + int i; + + strbuf_addstr(&buf, path); + strbuf_addch(&buf, '/'); + baselen = buf.len; + + for (i = 0; i < 256; i++) { + strbuf_addf(&buf, "%02x", i); + r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb, + subdir_cb, data); + strbuf_setlen(&buf, baselen); + if (r) + break; + } + + strbuf_release(&buf); + return r; +} -- cgit v1.3 From 660c889e46d185dc98ba78963528826728b0a55d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:41:21 -0400 Subject: sha1_file: add for_each iterators for loose and packed objects We typically iterate over the reachable objects in a repository by starting at the tips and walking the graph. There's no easy way to iterate over all of the objects, including unreachable ones. Let's provide a way of doing so. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 11 +++++++++++ sha1_file.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) (limited to 'cache.h') diff --git a/cache.h b/cache.h index bdfbbcf790..51ee856acc 100644 --- a/cache.h +++ b/cache.h @@ -1272,6 +1272,17 @@ int for_each_loose_file_in_objdir(const char *path, each_loose_subdir_fn subdir_cb, void *data); +/* + * Iterate over loose and packed objects in both the local + * repository and any alternates repositories. + */ +typedef int each_packed_object_fn(const unsigned char *sha1, + struct packed_git *pack, + uint32_t pos, + void *data); +extern int for_each_loose_object(each_loose_object_fn, void *); +extern int for_each_packed_object(each_packed_object_fn, void *); + struct object_info { /* Request */ enum object_type *typep; diff --git a/sha1_file.c b/sha1_file.c index fa08475c91..55c65b7ef6 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3348,3 +3348,65 @@ int for_each_loose_file_in_objdir(const char *path, strbuf_release(&buf); return r; } + +struct loose_alt_odb_data { + each_loose_object_fn *cb; + void *data; +}; + +static int loose_from_alt_odb(struct alternate_object_database *alt, + void *vdata) +{ + struct loose_alt_odb_data *data = vdata; + return for_each_loose_file_in_objdir(alt->base, + data->cb, NULL, NULL, + data->data); +} + +int for_each_loose_object(each_loose_object_fn cb, void *data) +{ + struct loose_alt_odb_data alt; + int r; + + r = for_each_loose_file_in_objdir(get_object_directory(), + cb, NULL, NULL, data); + if (r) + return r; + + alt.cb = cb; + alt.data = data; + return foreach_alt_odb(loose_from_alt_odb, &alt); +} + +static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) +{ + uint32_t i; + int r = 0; + + for (i = 0; i < p->num_objects; i++) { + const unsigned char *sha1 = nth_packed_object_sha1(p, i); + + if (!sha1) + return error("unable to get sha1 of object %u in %s", + i, p->pack_name); + + r = cb(sha1, p, i, data); + if (r) + break; + } + return r; +} + +int for_each_packed_object(each_packed_object_fn cb, void *data) +{ + struct packed_git *p; + int r = 0; + + prepare_packed_git(); + for (p = packed_git; p; p = p->next) { + r = for_each_object_in_pack(p, cb, data); + if (r) + break; + } + return r; +} -- cgit v1.3