aboutsummaryrefslogtreecommitdiff
path: root/packfile.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2026-03-31 20:43:06 -0700
committerJunio C Hamano <gitster@pobox.com>2026-03-31 20:43:06 -0700
commite104e63a813cf581156b5daa7d86835e1030648a (patch)
treee81b92df235a09adc92a6b9dc19f2c61f43e693c /packfile.c
parent270e10ad6dda3379ea0da7efd11e4fbf2cd7a325 (diff)
parent83869e15fa9ef3b0ea2adbfe2fe68a309f95b856 (diff)
downloadgit-e104e63a813cf581156b5daa7d86835e1030648a.tar.xz
Merge branch 'ps/odb-generic-object-name-handling' into ps/odb-cleanup
* ps/odb-generic-object-name-handling: odb: introduce generic `odb_find_abbrev_len()` object-file: move logic to compute packed abbreviation length object-name: move logic to compute loose abbreviation length object-name: simplify computing common prefixes object-name: abbreviate loose object names without `disambiguate_state` object-name: merge `update_candidates()` and `match_prefix()` object-name: backend-generic `get_short_oid()` object-name: backend-generic `repo_collect_ambiguous()` object-name: extract function to parse object ID prefixes object-name: move logic to iterate through packed prefixed objects object-name: move logic to iterate through loose prefixed objects odb: introduce `struct odb_for_each_object_options` oidtree: extend iteration to allow for arbitrary return codes oidtree: modernize the code a bit object-file: fix sparse 'plain integer as NULL pointer' error
Diffstat (limited to 'packfile.c')
-rw-r--r--packfile.c297
1 files changed, 291 insertions, 6 deletions
diff --git a/packfile.c b/packfile.c
index d4de9f3ffe..ee9c7ea1d1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2371,11 +2371,182 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
}
}
+static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
+{
+ do {
+ if (*a != *b)
+ return 0;
+ a++;
+ b++;
+ len -= 2;
+ } while (len > 1);
+ if (len)
+ if ((*a ^ *b) & 0xf0)
+ return 0;
+ return 1;
+}
+
+static int for_each_prefixed_object_in_midx(
+ struct packfile_store *store,
+ struct multi_pack_index *m,
+ const struct odb_for_each_object_options *opts,
+ struct packfile_store_for_each_object_wrapper_data *data)
+{
+ int ret;
+
+ for (; m; m = m->base_midx) {
+ uint32_t num, i, first = 0;
+ int len = opts->prefix_hex_len > m->source->odb->repo->hash_algo->hexsz ?
+ m->source->odb->repo->hash_algo->hexsz : opts->prefix_hex_len;
+
+ if (!m->num_objects)
+ continue;
+
+ num = m->num_objects + m->num_objects_in_base;
+
+ bsearch_one_midx(opts->prefix, m, &first);
+
+ /*
+ * At this point, "first" is the location of the lowest
+ * object with an object name that could match "opts->prefix".
+ * See if we have 0, 1 or more objects that actually match(es).
+ */
+ for (i = first; i < num; i++) {
+ const struct object_id *current = NULL;
+ struct object_id oid;
+
+ current = nth_midxed_object_oid(&oid, m, i);
+
+ if (!match_hash(len, opts->prefix->hash, current->hash))
+ break;
+
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ ret = packfile_store_read_object_info(store, current,
+ &oi, 0);
+ if (ret)
+ goto out;
+
+ ret = data->cb(&oid, &oi, data->cb_data);
+ if (ret)
+ goto out;
+ } else {
+ ret = data->cb(&oid, NULL, data->cb_data);
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int for_each_prefixed_object_in_pack(
+ struct packfile_store *store,
+ struct packed_git *p,
+ const struct odb_for_each_object_options *opts,
+ struct packfile_store_for_each_object_wrapper_data *data)
+{
+ uint32_t num, i, first = 0;
+ int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ?
+ p->repo->hash_algo->hexsz : opts->prefix_hex_len;
+ int ret;
+
+ num = p->num_objects;
+ bsearch_pack(opts->prefix, p, &first);
+
+ /*
+ * At this point, "first" is the location of the lowest object
+ * with an object name that could match "bin_pfx". See if we have
+ * 0, 1 or more objects that actually match(es).
+ */
+ for (i = first; i < num; i++) {
+ struct object_id oid;
+
+ nth_packed_object_id(&oid, p, i);
+ if (!match_hash(len, opts->prefix->hash, oid.hash))
+ break;
+
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ ret = packfile_store_read_object_info(store, &oid, &oi, 0);
+ if (ret)
+ goto out;
+
+ ret = data->cb(&oid, &oi, data->cb_data);
+ if (ret)
+ goto out;
+ } else {
+ ret = data->cb(&oid, NULL, data->cb_data);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int packfile_store_for_each_prefixed_object(
+ struct packfile_store *store,
+ const struct odb_for_each_object_options *opts,
+ struct packfile_store_for_each_object_wrapper_data *data)
+{
+ struct packfile_list_entry *e;
+ struct multi_pack_index *m;
+ bool pack_errors = false;
+ int ret;
+
+ if (opts->flags)
+ BUG("flags unsupported");
+
+ store->skip_mru_updates = true;
+
+ m = get_multi_pack_index(store->source);
+ if (m) {
+ ret = for_each_prefixed_object_in_midx(store, m, opts, data);
+ if (ret)
+ goto out;
+ }
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ if (e->pack->multi_pack_index)
+ continue;
+
+ if (open_pack_index(e->pack)) {
+ pack_errors = true;
+ continue;
+ }
+
+ if (!e->pack->num_objects)
+ continue;
+
+ ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ store->skip_mru_updates = false;
+ if (!ret && pack_errors)
+ ret = -1;
+ return ret;
+}
+
int packfile_store_for_each_object(struct packfile_store *store,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags)
+ const struct odb_for_each_object_options *opts)
{
struct packfile_store_for_each_object_wrapper_data data = {
.store = store,
@@ -2386,20 +2557,23 @@ int packfile_store_for_each_object(struct packfile_store *store,
struct packfile_list_entry *e;
int pack_errors = 0, ret;
+ if (opts->prefix)
+ return packfile_store_for_each_prefixed_object(store, opts, &data);
+
store->skip_mru_updates = true;
for (e = packfile_store_get_packs(store); e; e = e->next) {
struct packed_git *p = e->pack;
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
@@ -2408,7 +2582,7 @@ int packfile_store_for_each_object(struct packfile_store *store,
}
ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
- &data, flags);
+ &data, opts->flags);
if (ret)
goto out;
}
@@ -2423,6 +2597,117 @@ out:
return ret;
}
+static int extend_abbrev_len(const struct object_id *a,
+ const struct object_id *b,
+ unsigned *out)
+{
+ unsigned len = oid_common_prefix_hexlen(a, b);
+ if (len != hash_algos[a->algo].hexsz && len >= *out)
+ *out = len + 1;
+ return 0;
+}
+
+static void find_abbrev_len_for_midx(struct multi_pack_index *m,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ unsigned len = min_len;
+
+ for (; m; m = m->base_midx) {
+ int match = 0;
+ uint32_t num, first = 0;
+ struct object_id found_oid;
+
+ if (!m->num_objects)
+ continue;
+
+ num = m->num_objects + m->num_objects_in_base;
+ match = bsearch_one_midx(oid, m, &first);
+
+ /*
+ * first is now the position in the packfile where we
+ * would insert the object ID if it does not exist (or the
+ * position of the object ID if it does exist). Hence, we
+ * consider a maximum of two objects nearby for the
+ * abbreviation length.
+ */
+
+ if (!match) {
+ if (nth_midxed_object_oid(&found_oid, m, first))
+ extend_abbrev_len(&found_oid, oid, &len);
+ } else if (first < num - 1) {
+ if (nth_midxed_object_oid(&found_oid, m, first + 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+ if (first > 0) {
+ if (nth_midxed_object_oid(&found_oid, m, first - 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+ }
+
+ *out = len;
+}
+
+static void find_abbrev_len_for_pack(struct packed_git *p,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ int match;
+ uint32_t num, first = 0;
+ struct object_id found_oid;
+ unsigned len = min_len;
+
+ num = p->num_objects;
+ match = bsearch_pack(oid, p, &first);
+
+ /*
+ * first is now the position in the packfile where we would insert
+ * the object ID if it does not exist (or the position of mad->hash if
+ * it does exist). Hence, we consider a maximum of two objects
+ * nearby for the abbreviation length.
+ */
+ if (!match) {
+ if (!nth_packed_object_id(&found_oid, p, first))
+ extend_abbrev_len(&found_oid, oid, &len);
+ } else if (first < num - 1) {
+ if (!nth_packed_object_id(&found_oid, p, first + 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+ if (first > 0) {
+ if (!nth_packed_object_id(&found_oid, p, first - 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+
+ *out = len;
+}
+
+int packfile_store_find_abbrev_len(struct packfile_store *store,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ struct packfile_list_entry *e;
+ struct multi_pack_index *m;
+
+ m = get_multi_pack_index(store->source);
+ if (m)
+ find_abbrev_len_for_midx(m, oid, min_len, &min_len);
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ if (e->pack->multi_pack_index)
+ continue;
+ if (open_pack_index(e->pack) || !e->pack->num_objects)
+ continue;
+
+ find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len);
+ }
+
+ *out = min_len;
+ return 0;
+}
+
struct add_promisor_object_data {
struct repository *repo;
struct oidset *set;