diff options
| author | Patrick Steinhardt <ps@pks.im> | 2026-03-12 09:42:58 +0100 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2026-03-12 08:38:42 -0700 |
| commit | 222fddeaa44b633eea345996735b4f7893eb71ec (patch) | |
| tree | 27baa462f10c8b3531ad378daaef92acdee7ac31 | |
| parent | dd587cd59e1575f2d5698cb45b42644e1df9b835 (diff) | |
| download | git-222fddeaa44b633eea345996735b4f7893eb71ec.tar.xz | |
object-file: extract logic to approximate object count
In "builtin/gc.c" we have some logic that checks whether we need to
repack objects. This is done by counting the number of objects that we
have and checking whether it exceeds a certain threshold. We don't
really need an accurate object count though, which is why we only
open a single object directory shard and then extrapolate from there.
Extract this logic into a new function that is owned by the loose object
database source. This is done to prepare for a subsequent change, where
we'll introduce object counting on the object database source level.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
| -rw-r--r-- | builtin/gc.c | 35 | ||||
| -rw-r--r-- | object-file.c | 41 | ||||
| -rw-r--r-- | object-file.h | 13 |
3 files changed, 62 insertions, 27 deletions
diff --git a/builtin/gc.c b/builtin/gc.c index fb329c2cff..a08c7554cb 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -467,37 +467,18 @@ out: static int too_many_loose_objects(int limit) { /* - * Quickly check if a "gc" is needed, by estimating how - * many loose objects there are. Because SHA-1 is evenly - * distributed, we can check only one and get a reasonable - * estimate. + * This is weird, but stems from legacy behaviour: the GC auto + * threshold was always essentially interpreted as if it was rounded up + * to the next multiple 256 of, so we retain this behaviour for now. */ - DIR *dir; - struct dirent *ent; - int auto_threshold; - int num_loose = 0; - int needed = 0; - const unsigned hexsz_loose = the_hash_algo->hexsz - 2; - char *path; + int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; + unsigned long loose_count; - path = repo_git_path(the_repository, "objects/17"); - dir = opendir(path); - free(path); - if (!dir) + if (odb_source_loose_approximate_object_count(the_repository->objects->sources, + &loose_count) < 0) return 0; - auto_threshold = DIV_ROUND_UP(limit, 256); - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || - ent->d_name[hexsz_loose] != '\0') - continue; - if (++num_loose > auto_threshold) { - needed = 1; - break; - } - } - closedir(dir); - return needed; + return loose_count > auto_threshold; } static struct packed_git *find_base_packs(struct string_list *packs, diff --git a/object-file.c b/object-file.c index a3ff7f586c..da67e3c9ff 100644 --- a/object-file.c +++ b/object-file.c @@ -1868,6 +1868,47 @@ int odb_source_loose_for_each_object(struct odb_source *source, NULL, NULL, &data); } +int odb_source_loose_approximate_object_count(struct odb_source *source, + unsigned long *out) +{ + const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; + unsigned long count = 0; + struct dirent *ent; + char *path = NULL; + DIR *dir = NULL; + int ret; + + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); + goto out; + } + + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } + + *out = count * 256; + ret = 0; + +out: + if (dir) + closedir(dir); + free(path); + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index ff6da65296..b870ea9fa8 100644 --- a/object-file.h +++ b/object-file.h @@ -139,6 +139,19 @@ int odb_source_loose_for_each_object(struct odb_source *source, void *cb_data, unsigned flags); +/* + * Count the number of loose objects in this source. + * + * The object count is approximated by opening a single sharding directory for + * loose objects and scanning its contents. The result is then extrapolated by + * 256. This should generally work as a reasonable estimate given that the + * object hash is supposed to be indistinguishable from random. + * + * Returns 0 on success, a negative error code otherwise. + */ +int odb_source_loose_approximate_object_count(struct odb_source *source, + unsigned long *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial "<type> <obj-len>" part of the loose object |
