From cfd575f0a9730712107e4ee6799a37665bcd8204 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:29 +0100 Subject: odb: introduce `struct odb_for_each_object_options` The `odb_for_each_object()` function only accepts a bitset of flags. In a subsequent commit we'll want to change object iteration to also support iterating over only those objects that have a specific prefix. While we could of course add the prefix to the function signature, or alternatively introduce a new function, both of these options don't really seem to be that sensible. Instead, introduce a new `struct odb_for_each_object_options` that can be passed to a new `odb_for_each_object_ext()` function. Splice through the options structure into the respective object database sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'odb.h') diff --git a/odb.h b/odb.h index 9aee260105..a19a8bb50d 100644 --- a/odb.h +++ b/odb.h @@ -481,6 +481,15 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct object_info *oi, void *cb_data); +/* + * Options that can be passed to `odb_for_each_object()` and its + * backend-specific implementations. + */ +struct odb_for_each_object_options { + /* A bitfield of `odb_for_each_object_flags`. */ + enum odb_for_each_object_flags flags; +}; + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored @@ -495,6 +504,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, * Returns 0 on success, a negative error code in case a failure occurred, or * an arbitrary non-zero error code returned by the callback itself. */ +int odb_for_each_object_ext(struct object_database *odb, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts); + +/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */ int odb_for_each_object(struct object_database *odb, const struct object_info *request, odb_for_each_object_cb cb, -- cgit v1.3 From 284b7862be735bb47276ac288ace153ae3d06938 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:30 +0100 Subject: object-name: move logic to iterate through loose prefixed objects The logic to iterate through loose objects that have a certain prefix is currently hosted in "object-name.c". This logic reaches into specifics of the loose object source, so it breaks once a different backend is used for the object storage. Move the logic to iterate through loose objects with a prefix into "object-file.c". This is done by extending the for-each-object options to support an optional prefix that is then honored by the loose source. Naturally, we'll also have this support in the packfile store. This is done in the next commit. Furthermore, there are no users of the loose cache outside of "object-file.c" anymore. As such, convert `odb_source_loose_cache()` to have file scope. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 29 +++++++++++++++++++++++++++-- object-file.h | 7 ------- object-name.c | 10 ++++++---- odb.h | 7 +++++++ 4 files changed, 40 insertions(+), 13 deletions(-) (limited to 'odb.h') diff --git a/object-file.c b/object-file.c index 56cbb27ab9..13732f324f 100644 --- a/object-file.c +++ b/object-file.c @@ -33,6 +33,9 @@ /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid); + static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -1845,6 +1848,23 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, } } +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_loose_read_object_info(data->source, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + int odb_source_loose_for_each_object(struct odb_source *source, const struct object_info *request, odb_for_each_object_cb cb, @@ -1864,6 +1884,11 @@ int odb_source_loose_for_each_object(struct odb_source *source, if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) return 0; + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(source, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, NULL, NULL, &data); } @@ -1935,8 +1960,8 @@ static int append_loose_object(const struct object_id *oid, return 0; } -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) +static struct oidtree *odb_source_loose_cache(struct odb_source *source, + const struct object_id *oid) { struct odb_source_files *files = odb_source_files_downcast(source); int subdir_nr = oid->hash[0]; diff --git a/object-file.h b/object-file.h index 46dfa7b632..f11ad58f6c 100644 --- a/object-file.h +++ b/object-file.h @@ -74,13 +74,6 @@ int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); -/* - * Populate and return the loose object cache array corresponding to the - * given object ID. - */ -struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - /* * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. diff --git a/object-name.c b/object-name.c index a24a1b48e1..929a68dbd0 100644 --- a/object-name.c +++ b/object-name.c @@ -16,7 +16,6 @@ #include "remote.h" #include "dir.h" #include "oid-array.h" -#include "oidtree.h" #include "packfile.h" #include "pretty.h" #include "object-file.h" @@ -103,7 +102,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); -static int match_prefix(const struct object_id *oid, void *arg) +static int match_prefix(const struct object_id *oid, struct object_info *oi UNUSED, void *arg) { struct disambiguate_state *ds = arg; /* no need to call match_hash, oidtree_each did prefix match */ @@ -113,11 +112,14 @@ static int match_prefix(const struct object_id *oid, void *arg) static void find_short_object_filename(struct disambiguate_state *ds) { + struct odb_for_each_object_options opts = { + .prefix = &ds->bin_pfx, + .prefix_hex_len = ds->len, + }; struct odb_source *source; for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), - &ds->bin_pfx, ds->len, match_prefix, ds); + odb_source_loose_for_each_object(source, NULL, match_prefix, ds, &opts); } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) diff --git a/odb.h b/odb.h index a19a8bb50d..e80fd8f7ab 100644 --- a/odb.h +++ b/odb.h @@ -488,6 +488,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid, struct odb_for_each_object_options { /* A bitfield of `odb_for_each_object_flags`. */ enum odb_for_each_object_flags flags; + + /* + * If set, only iterate through objects whose first `prefix_hex_len` + * hex characters matches the given prefix. + */ + const struct object_id *prefix; + size_t prefix_hex_len; }; /* -- cgit v1.3 From 83869e15fa9ef3b0ea2adbfe2fe68a309f95b856 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 20 Mar 2026 08:07:40 +0100 Subject: odb: introduce generic `odb_find_abbrev_len()` Introduce a new generic `odb_find_abbrev_len()` function as well as source-specific callback functions. This makes the logic to compute the required prefix length to make a given object unique fully pluggable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-name.c | 57 +++--------------------------------------- odb.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ odb.h | 16 ++++++++++++ odb/source-files.c | 25 +++++++++++++++++++ odb/source.h | 24 ++++++++++++++++++ 5 files changed, 142 insertions(+), 53 deletions(-) (limited to 'odb.h') diff --git a/object-name.c b/object-name.c index bb2294a193..f6e1f29e1f 100644 --- a/object-name.c +++ b/object-name.c @@ -15,10 +15,9 @@ #include "refs.h" #include "remote.h" #include "dir.h" +#include "odb.h" #include "oid-array.h" -#include "packfile.h" #include "pretty.h" -#include "object-file.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" @@ -569,19 +568,6 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix, return ret; } -/* - * Return the slot of the most-significant bit set in "val". There are various - * ways to do this quickly with fls() or __builtin_clzl(), but speed is - * probably not a big deal here. - */ -static unsigned msb(unsigned long val) -{ - unsigned r = 0; - while (val >>= 1) - r++; - return r; -} - void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, const struct object_id *oid, int abbrev_len) { @@ -602,49 +588,14 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, { const struct git_hash_algo *algo = oid->algo ? &hash_algos[oid->algo] : r->hash_algo; - const unsigned hexsz = algo->hexsz; unsigned len; - if (min_len < 0) { - unsigned long count; - - if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) - count = 0; - - /* - * Add one because the MSB only tells us the highest bit set, - * not including the value of all the _other_ bits (so "15" - * is only one off of 2^4, but the MSB is the 3rd bit. - */ - len = msb(count) + 1; - /* - * We now know we have on the order of 2^len objects, which - * expects a collision at 2^(len/2). But we also care about hex - * chars, not bits, and there are 4 bits per hex. So all - * together we need to divide by 2 and round up. - */ - len = DIV_ROUND_UP(len, 2); - /* - * For very small repos, we stick with our regular fallback. - */ - if (len < FALLBACK_DEFAULT_ABBREV) - len = FALLBACK_DEFAULT_ABBREV; - } else { - len = min_len; - } + if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0) + len = algo->hexsz; oid_to_hex_r(hex, oid); - if (len >= hexsz || !len) - return hexsz; - - odb_prepare_alternates(r->objects); - for (struct odb_source *s = r->objects->sources; s; s = s->next) { - struct odb_source_files *files = odb_source_files_downcast(s); - packfile_store_find_abbrev_len(files->packed, oid, len, &len); - odb_source_loose_find_abbrev_len(s, oid, len, &len); - } - hex[len] = 0; + return len; } diff --git a/odb.c b/odb.c index 3019957b87..3f94a53df1 100644 --- a/odb.c +++ b/odb.c @@ -12,6 +12,7 @@ #include "midx.h" #include "object-file-convert.h" #include "object-file.h" +#include "object-name.h" #include "odb.h" #include "packfile.h" #include "path.h" @@ -964,6 +965,78 @@ out: return ret; } +/* + * Return the slot of the most-significant bit set in "val". There are various + * ways to do this quickly with fls() or __builtin_clzl(), but speed is + * probably not a big deal here. + */ +static unsigned msb(unsigned long val) +{ + unsigned r = 0; + while (val >>= 1) + r++; + return r; +} + +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_length, + unsigned *out) +{ + const struct git_hash_algo *algo = + oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo; + const unsigned hexsz = algo->hexsz; + unsigned len; + int ret; + + if (min_length < 0) { + unsigned long count; + + if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + + /* + * Add one because the MSB only tells us the highest bit set, + * not including the value of all the _other_ bits (so "15" + * is only one off of 2^4, but the MSB is the 3rd bit. + */ + len = msb(count) + 1; + /* + * We now know we have on the order of 2^len objects, which + * expects a collision at 2^(len/2). But we also care about hex + * chars, not bits, and there are 4 bits per hex. So all + * together we need to divide by 2 and round up. + */ + len = DIV_ROUND_UP(len, 2); + /* + * For very small repos, we stick with our regular fallback. + */ + if (len < FALLBACK_DEFAULT_ABBREV) + len = FALLBACK_DEFAULT_ABBREV; + } else { + len = min_length; + } + + if (len >= hexsz || !len) { + *out = hexsz; + ret = 0; + goto out; + } + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + ret = odb_source_find_abbrev_len(source, oid, len, &len); + if (ret) + goto out; + } + + ret = 0; + *out = len; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { diff --git a/odb.h b/odb.h index e80fd8f7ab..984bafca9d 100644 --- a/odb.h +++ b/odb.h @@ -545,6 +545,22 @@ int odb_count_objects(struct object_database *odb, enum odb_count_objects_flags flags, unsigned long *out); +/* + * Given an object ID, find the minimum required length required to make the + * object ID unique across the whole object database. + * + * The `min_len` determines the minimum abbreviated length that'll be returned + * by this function. If `min_len < 0`, then the function will set a sensible + * default minimum abbreviation length. + * + * Returns 0 on success, a negative error code otherwise. The computed length + * will be assigned to `*out`. + */ +int odb_find_abbrev_len(struct object_database *odb, + const struct object_id *oid, + int min_len, + unsigned *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/odb/source-files.c b/odb/source-files.c index e90bb689bb..76797569de 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -122,6 +122,30 @@ out: return ret; } +static int odb_source_files_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned len = min_len; + int ret; + + ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len); + if (ret < 0) + goto out; + + ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + if (ret < 0) + goto out; + + *out = len; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; files->base.count_objects = odb_source_files_count_objects; + files->base.find_abbrev_len = odb_source_files_find_abbrev_len; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index ee5d6ed530..a9d7d0b96f 100644 --- a/odb/source.h +++ b/odb/source.h @@ -157,6 +157,18 @@ struct odb_source { enum odb_count_objects_flags flags, unsigned long *out); + /* + * This callback is expected to find the minimum required length to + * make the given object ID unique. + * + * The callback is expected to return a negative error code in case it + * failed, 0 otherwise. + */ + int (*find_abbrev_len)(struct odb_source *source, + const struct object_id *oid, + unsigned min_length, + unsigned *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -360,6 +372,18 @@ static inline int odb_source_count_objects(struct odb_source *source, return source->count_objects(source, flags, out); } +/* + * Determine the minimum required length to make the given object ID unique in + * the given source. Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + return source->find_abbrev_len(source, oid, min_len, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does -- cgit v1.3