aboutsummaryrefslogtreecommitdiff
path: root/odb.c
diff options
context:
space:
mode:
authorPatrick Steinhardt <ps@pks.im>2026-03-20 08:07:40 +0100
committerJunio C Hamano <gitster@pobox.com>2026-03-20 13:16:42 -0700
commit83869e15fa9ef3b0ea2adbfe2fe68a309f95b856 (patch)
tree40dd62b65441c70670cd5f019d11c88f9788e8ed /odb.c
parent6c2ede6e4abed754bb5891c2904212c05efcfb11 (diff)
downloadgit-83869e15fa9ef3b0ea2adbfe2fe68a309f95b856.tar.xz
odb: introduce generic `odb_find_abbrev_len()`
Introduce a new generic `odb_find_abbrev_len()` function as well as source-specific callback functions. This makes the logic to compute the required prefix length to make a given object unique fully pluggable. Signed-off-by: Patrick Steinhardt <ps@pks.im> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'odb.c')
-rw-r--r--odb.c73
1 files changed, 73 insertions, 0 deletions
diff --git a/odb.c b/odb.c
index 3019957b87..3f94a53df1 100644
--- a/odb.c
+++ b/odb.c
@@ -12,6 +12,7 @@
#include "midx.h"
#include "object-file-convert.h"
#include "object-file.h"
+#include "object-name.h"
#include "odb.h"
#include "packfile.h"
#include "path.h"
@@ -964,6 +965,78 @@ out:
return ret;
}
+/*
+ * Return the slot of the most-significant bit set in "val". There are various
+ * ways to do this quickly with fls() or __builtin_clzl(), but speed is
+ * probably not a big deal here.
+ */
+static unsigned msb(unsigned long val)
+{
+ unsigned r = 0;
+ while (val >>= 1)
+ r++;
+ return r;
+}
+
+int odb_find_abbrev_len(struct object_database *odb,
+ const struct object_id *oid,
+ int min_length,
+ unsigned *out)
+{
+ const struct git_hash_algo *algo =
+ oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo;
+ const unsigned hexsz = algo->hexsz;
+ unsigned len;
+ int ret;
+
+ if (min_length < 0) {
+ unsigned long count;
+
+ if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
+ count = 0;
+
+ /*
+ * Add one because the MSB only tells us the highest bit set,
+ * not including the value of all the _other_ bits (so "15"
+ * is only one off of 2^4, but the MSB is the 3rd bit.
+ */
+ len = msb(count) + 1;
+ /*
+ * We now know we have on the order of 2^len objects, which
+ * expects a collision at 2^(len/2). But we also care about hex
+ * chars, not bits, and there are 4 bits per hex. So all
+ * together we need to divide by 2 and round up.
+ */
+ len = DIV_ROUND_UP(len, 2);
+ /*
+ * For very small repos, we stick with our regular fallback.
+ */
+ if (len < FALLBACK_DEFAULT_ABBREV)
+ len = FALLBACK_DEFAULT_ABBREV;
+ } else {
+ len = min_length;
+ }
+
+ if (len >= hexsz || !len) {
+ *out = hexsz;
+ ret = 0;
+ goto out;
+ }
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ ret = odb_source_find_abbrev_len(source, oid, len, &len);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+ *out = len;
+
+out:
+ return ret;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{