aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtin/cat-file.c7
-rw-r--r--builtin/pack-objects.c12
-rw-r--r--cbtree.c21
-rw-r--r--cbtree.h17
-rw-r--r--commit-graph.c5
-rw-r--r--hash.c18
-rw-r--r--hash.h3
-rw-r--r--object-file.c76
-rw-r--r--object-file.h21
-rw-r--r--object-name.c437
-rw-r--r--odb.c99
-rw-r--r--odb.h39
-rw-r--r--odb/source-files.c33
-rw-r--r--odb/source.h30
-rw-r--r--oidtree.c63
-rw-r--r--oidtree.h48
-rw-r--r--packfile.c297
-rw-r--r--packfile.h7
-rw-r--r--t/unit-tests/u-oidtree.c18
19 files changed, 781 insertions, 470 deletions
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index b6f12f41d6..cd13a3a89f 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -848,6 +848,9 @@ static void batch_each_object(struct batch_options *opt,
.callback = callback,
.payload = _payload,
};
+ struct odb_for_each_object_options opts = {
+ .flags = flags,
+ };
struct bitmap_index *bitmap = NULL;
struct odb_source *source;
@@ -860,7 +863,7 @@ static void batch_each_object(struct batch_options *opt,
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next) {
int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
- &payload, flags);
+ &payload, &opts);
if (ret)
break;
}
@@ -884,7 +887,7 @@ static void batch_each_object(struct batch_options *opt,
for (source = the_repository->objects->sources; source; source = source->next) {
struct odb_source_files *files = odb_source_files_downcast(source);
int ret = packfile_store_for_each_object(files->packed, &oi,
- batch_one_object_oi, &payload, flags);
+ batch_one_object_oi, &payload, &opts);
if (ret)
break;
}
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index da1087930c..88388254d9 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4359,6 +4359,12 @@ static void add_objects_in_unpacked_packs(void)
{
struct odb_source *source;
time_t mtime;
+ struct odb_for_each_object_options opts = {
+ .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS,
+ };
struct object_info oi = {
.mtimep = &mtime,
};
@@ -4371,11 +4377,7 @@ static void add_objects_in_unpacked_packs(void)
continue;
if (packfile_store_for_each_object(files->packed, &oi,
- add_object_in_unpacked_pack, NULL,
- ODB_FOR_EACH_OBJECT_PACK_ORDER |
- ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
- ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ add_object_in_unpacked_pack, NULL, &opts))
die(_("cannot open pack index"));
}
}
diff --git a/cbtree.c b/cbtree.c
index cf8cf75b89..4ab794bddc 100644
--- a/cbtree.c
+++ b/cbtree.c
@@ -96,26 +96,28 @@ struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen)
return p && !memcmp(p->k, k, klen) ? p : NULL;
}
-static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg)
+static int cb_descend(struct cb_node *p, cb_iter fn, void *arg)
{
if (1 & (uintptr_t)p) {
struct cb_node *q = cb_node_of(p);
- enum cb_next n = cb_descend(q->child[0], fn, arg);
-
- return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg);
+ int ret = cb_descend(q->child[0], fn, arg);
+ if (ret)
+ return ret;
+ return cb_descend(q->child[1], fn, arg);
} else {
return fn(p, arg);
}
}
-void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
- cb_iter fn, void *arg)
+int cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
+ cb_iter fn, void *arg)
{
struct cb_node *p = t->root;
struct cb_node *top = p;
size_t i = 0;
- if (!p) return; /* empty tree */
+ if (!p)
+ return 0; /* empty tree */
/* Walk tree, maintaining top pointer */
while (1 & (uintptr_t)p) {
@@ -130,7 +132,8 @@ void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen,
for (i = 0; i < klen; i++) {
if (p->k[i] != kpfx[i])
- return; /* "best" match failed */
+ return 0; /* "best" match failed */
}
- cb_descend(top, fn, arg);
+
+ return cb_descend(top, fn, arg);
}
diff --git a/cbtree.h b/cbtree.h
index 43193abdda..c374b1b3db 100644
--- a/cbtree.h
+++ b/cbtree.h
@@ -30,11 +30,6 @@ struct cb_tree {
struct cb_node *root;
};
-enum cb_next {
- CB_CONTINUE = 0,
- CB_BREAK = 1
-};
-
#define CBTREE_INIT { 0 }
static inline void cb_init(struct cb_tree *t)
@@ -46,9 +41,15 @@ static inline void cb_init(struct cb_tree *t)
struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen);
struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen);
-typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg);
+/*
+ * Callback invoked by `cb_each()` for each node in the critbit tree. A return
+ * value of 0 will cause the iteration to continue, a non-zero return code will
+ * cause iteration to abort. The error code will be relayed back from
+ * `cb_each()` in that case.
+ */
+typedef int (*cb_iter)(struct cb_node *, void *arg);
-void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen,
- cb_iter, void *arg);
+int cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen,
+ cb_iter, void *arg);
#endif /* CBTREE_H */
diff --git a/commit-graph.c b/commit-graph.c
index c030003330..df4b4a125e 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1969,6 +1969,9 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
struct odb_source *source;
enum object_type type;
+ struct odb_for_each_object_options opts = {
+ .flags = ODB_FOR_EACH_OBJECT_PACK_ORDER,
+ };
struct object_info oi = {
.typep = &type,
};
@@ -1983,7 +1986,7 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
for (source = ctx->r->objects->sources; source; source = source->next) {
struct odb_source_files *files = odb_source_files_downcast(source);
packfile_store_for_each_object(files->packed, &oi, add_packed_commits_oi,
- ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+ ctx, &opts);
}
if (ctx->progress_done < ctx->approx_nr_objects)
diff --git a/hash.c b/hash.c
index 553f2008ea..e925b9754e 100644
--- a/hash.c
+++ b/hash.c
@@ -317,3 +317,21 @@ const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop)
/* Otherwise use the default one. */
return algop;
}
+
+unsigned oid_common_prefix_hexlen(const struct object_id *a,
+ const struct object_id *b)
+{
+ unsigned rawsz = hash_algos[a->algo].rawsz;
+
+ for (unsigned i = 0; i < rawsz; i++) {
+ if (a->hash[i] == b->hash[i])
+ continue;
+
+ if ((a->hash[i] ^ b->hash[i]) & 0xf0)
+ return i * 2;
+ else
+ return i * 2 + 1;
+ }
+
+ return rawsz * 2;
+}
diff --git a/hash.h b/hash.h
index d51efce1d3..c082a53c9a 100644
--- a/hash.h
+++ b/hash.h
@@ -396,6 +396,9 @@ static inline int oideq(const struct object_id *oid1, const struct object_id *oi
return !memcmp(oid1->hash, oid2->hash, GIT_MAX_RAWSZ);
}
+unsigned oid_common_prefix_hexlen(const struct object_id *a,
+ const struct object_id *b);
+
static inline void oidcpy(struct object_id *dst, const struct object_id *src)
{
memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
diff --git a/object-file.c b/object-file.c
index f0b029ff0b..4f77ce0982 100644
--- a/object-file.c
+++ b/object-file.c
@@ -33,6 +33,9 @@
/* The maximum size for an object header. */
#define MAX_HEADER_LEN 32
+static struct oidtree *odb_source_loose_cache(struct odb_source *source,
+ const struct object_id *oid);
+
static int get_conv_flags(unsigned flags)
{
if (flags & INDEX_RENORMALIZE)
@@ -1845,11 +1848,28 @@ static int for_each_object_wrapper_cb(const struct object_id *oid,
}
}
+static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ if (odb_source_loose_read_object_info(data->source,
+ oid, &oi, 0) < 0)
+ return -1;
+
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
+ }
+}
+
int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags)
+ const struct odb_for_each_object_options *opts)
{
struct for_each_object_wrapper_data data = {
.source = source,
@@ -1859,11 +1879,16 @@ int odb_source_loose_for_each_object(struct odb_source *source,
};
/* There are no loose promisor objects, so we can return immediately. */
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
return 0;
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
return 0;
+ if (opts->prefix)
+ return oidtree_each(odb_source_loose_cache(source, opts->prefix),
+ opts->prefix, opts->prefix_hex_len,
+ for_each_prefixed_object_wrapper_cb, &data);
+
return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
NULL, NULL, &data);
}
@@ -1914,9 +1939,10 @@ int odb_source_loose_count_objects(struct odb_source *source,
*out = count * 256;
ret = 0;
} else {
+ struct odb_for_each_object_options opts = { 0 };
*out = 0;
ret = odb_source_loose_for_each_object(source, NULL, count_loose_object,
- out, 0);
+ out, &opts);
}
out:
@@ -1926,6 +1952,44 @@ out:
return ret;
}
+struct find_abbrev_len_data {
+ const struct object_id *oid;
+ unsigned len;
+};
+
+static int find_abbrev_len_cb(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data)
+{
+ struct find_abbrev_len_data *data = cb_data;
+ unsigned len = oid_common_prefix_hexlen(oid, data->oid);
+ if (len != hash_algos[oid->algo].hexsz && len >= data->len)
+ data->len = len + 1;
+ return 0;
+}
+
+int odb_source_loose_find_abbrev_len(struct odb_source *source,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ struct odb_for_each_object_options opts = {
+ .prefix = oid,
+ .prefix_hex_len = min_len,
+ };
+ struct find_abbrev_len_data data = {
+ .oid = oid,
+ .len = min_len,
+ };
+ int ret;
+
+ ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb,
+ &data, &opts);
+ *out = data.len;
+
+ return ret;
+}
+
static int append_loose_object(const struct object_id *oid,
const char *path UNUSED,
void *data)
@@ -1934,8 +1998,8 @@ static int append_loose_object(const struct object_id *oid,
return 0;
}
-struct oidtree *odb_source_loose_cache(struct odb_source *source,
- const struct object_id *oid)
+static struct oidtree *odb_source_loose_cache(struct odb_source *source,
+ const struct object_id *oid)
{
struct odb_source_files *files = odb_source_files_downcast(source);
int subdir_nr = oid->hash[0];
diff --git a/object-file.h b/object-file.h
index f8d8805a18..3686f182e4 100644
--- a/object-file.h
+++ b/object-file.h
@@ -75,13 +75,6 @@ int odb_source_loose_write_stream(struct odb_source *source,
struct object_id *oid);
/*
- * Populate and return the loose object cache array corresponding to the
- * given object ID.
- */
-struct oidtree *odb_source_loose_cache(struct odb_source *source,
- const struct object_id *oid);
-
-/*
* Put in `buf` the name of the file in the local object database that
* would be used to store a loose object with the specified oid.
*/
@@ -137,7 +130,7 @@ int odb_source_loose_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags);
+ const struct odb_for_each_object_options *opts);
/*
* Count the number of loose objects in this source.
@@ -153,6 +146,18 @@ int odb_source_loose_count_objects(struct odb_source *source,
enum odb_count_objects_flags flags,
unsigned long *out);
+/*
+ * Find the shortest unique prefix for the given object ID, where `min_len` is
+ * the minimum length that the prefix should have.
+ *
+ * Returns 0 on success, in which case the computed length will be written to
+ * `out`. Otherwise, a negative error code is returned.
+ */
+int odb_source_loose_find_abbrev_len(struct odb_source *source,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out);
+
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
* writes the initial "<type> <obj-len>" part of the loose object
diff --git a/object-name.c b/object-name.c
index 84a34b83ba..21dcdc4a0e 100644
--- a/object-name.c
+++ b/object-name.c
@@ -15,11 +15,9 @@
#include "refs.h"
#include "remote.h"
#include "dir.h"
+#include "odb.h"
#include "oid-array.h"
-#include "oidtree.h"
-#include "packfile.h"
#include "pretty.h"
-#include "object-file.h"
#include "read-cache-ll.h"
#include "repo-settings.h"
#include "repository.h"
@@ -49,30 +47,29 @@ struct disambiguate_state {
unsigned candidate_ok:1;
unsigned disambiguate_fn_used:1;
unsigned ambiguous:1;
- unsigned always_call_fn:1;
};
-static void update_candidates(struct disambiguate_state *ds, const struct object_id *current)
+static int update_disambiguate_state(const struct object_id *current,
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
+ struct disambiguate_state *ds = cb_data;
+
/* The hash algorithm of current has already been filtered */
- if (ds->always_call_fn) {
- ds->ambiguous = ds->fn(ds->repo, current, ds->cb_data) ? 1 : 0;
- return;
- }
if (!ds->candidate_exists) {
/* this is the first candidate */
oidcpy(&ds->candidate, current);
ds->candidate_exists = 1;
- return;
+ return 0;
} else if (oideq(&ds->candidate, current)) {
/* the same as what we already have seen */
- return;
+ return 0;
}
if (!ds->fn) {
/* cannot disambiguate between ds->candidate and current */
ds->ambiguous = 1;
- return;
+ return ds->ambiguous;
}
if (!ds->candidate_checked) {
@@ -85,7 +82,7 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
/* discard the candidate; we know it does not satisfy fn */
oidcpy(&ds->candidate, current);
ds->candidate_checked = 0;
- return;
+ return 0;
}
/* if we reach this point, we know ds->candidate satisfies fn */
@@ -96,128 +93,12 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
*/
ds->candidate_ok = 0;
ds->ambiguous = 1;
+ return ds->ambiguous;
}
/* otherwise, current can be discarded and candidate is still good */
-}
-
-static int match_hash(unsigned, const unsigned char *, const unsigned char *);
-
-static enum cb_next match_prefix(const struct object_id *oid, void *arg)
-{
- struct disambiguate_state *ds = arg;
- /* no need to call match_hash, oidtree_each did prefix match */
- update_candidates(ds, oid);
- return ds->ambiguous ? CB_BREAK : CB_CONTINUE;
-}
-
-static void find_short_object_filename(struct disambiguate_state *ds)
-{
- struct odb_source *source;
-
- for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next)
- oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx),
- &ds->bin_pfx, ds->len, match_prefix, ds);
-}
-
-static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
-{
- do {
- if (*a != *b)
- return 0;
- a++;
- b++;
- len -= 2;
- } while (len > 1);
- if (len)
- if ((*a ^ *b) & 0xf0)
- return 0;
- return 1;
-}
-static void unique_in_midx(struct multi_pack_index *m,
- struct disambiguate_state *ds)
-{
- for (; m; m = m->base_midx) {
- uint32_t num, i, first = 0;
- const struct object_id *current = NULL;
- int len = ds->len > ds->repo->hash_algo->hexsz ?
- ds->repo->hash_algo->hexsz : ds->len;
-
- if (!m->num_objects)
- continue;
-
- num = m->num_objects + m->num_objects_in_base;
-
- bsearch_one_midx(&ds->bin_pfx, m, &first);
-
- /*
- * At this point, "first" is the location of the lowest
- * object with an object name that could match
- * "bin_pfx". See if we have 0, 1 or more objects that
- * actually match(es).
- */
- for (i = first; i < num && !ds->ambiguous; i++) {
- struct object_id oid;
- current = nth_midxed_object_oid(&oid, m, i);
- if (!match_hash(len, ds->bin_pfx.hash, current->hash))
- break;
- update_candidates(ds, current);
- }
- }
-}
-
-static void unique_in_pack(struct packed_git *p,
- struct disambiguate_state *ds)
-{
- uint32_t num, i, first = 0;
- int len = ds->len > ds->repo->hash_algo->hexsz ?
- ds->repo->hash_algo->hexsz : ds->len;
-
- if (p->multi_pack_index)
- return;
-
- if (open_pack_index(p) || !p->num_objects)
- return;
-
- num = p->num_objects;
- bsearch_pack(&ds->bin_pfx, p, &first);
-
- /*
- * At this point, "first" is the location of the lowest object
- * with an object name that could match "bin_pfx". See if we have
- * 0, 1 or more objects that actually match(es).
- */
- for (i = first; i < num && !ds->ambiguous; i++) {
- struct object_id oid;
- nth_packed_object_id(&oid, p, i);
- if (!match_hash(len, ds->bin_pfx.hash, oid.hash))
- break;
- update_candidates(ds, &oid);
- }
-}
-
-static void find_short_packed_object(struct disambiguate_state *ds)
-{
- struct odb_source *source;
- struct packed_git *p;
-
- /* Skip, unless oids from the storage hash algorithm are wanted */
- if (ds->bin_pfx.algo && (&hash_algos[ds->bin_pfx.algo] != ds->repo->hash_algo))
- return;
-
- odb_prepare_alternates(ds->repo->objects);
- for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) {
- struct multi_pack_index *m = get_multi_pack_index(source);
- if (m)
- unique_in_midx(m, ds);
- }
-
- repo_for_each_pack(ds->repo, p) {
- if (ds->ambiguous)
- break;
- unique_in_pack(p, ds);
- }
+ return 0;
}
static int finish_object_disambiguation(struct disambiguate_state *ds,
@@ -348,41 +229,57 @@ int set_disambiguate_hint_config(const char *var, const char *value)
return error("unknown hint type for '%s': %s", var, value);
}
+static int parse_oid_prefix(const char *name, int len,
+ const struct git_hash_algo *algo,
+ char *hex_out,
+ struct object_id *oid_out)
+{
+ for (int i = 0; i < len; i++) {
+ unsigned char c = name[i];
+ unsigned char val;
+ if (c >= '0' && c <= '9') {
+ val = c - '0';
+ } else if (c >= 'a' && c <= 'f') {
+ val = c - 'a' + 10;
+ } else if (c >= 'A' && c <='F') {
+ val = c - 'A' + 10;
+ c -= 'A' - 'a';
+ } else {
+ return -1;
+ }
+
+ if (hex_out)
+ hex_out[i] = c;
+ if (oid_out) {
+ if (!(i & 1))
+ val <<= 4;
+ oid_out->hash[i >> 1] |= val;
+ }
+ }
+
+ if (hex_out)
+ hex_out[len] = '\0';
+ if (oid_out)
+ oid_out->algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN;
+
+ return 0;
+}
+
static int init_object_disambiguation(struct repository *r,
const char *name, int len,
const struct git_hash_algo *algo,
struct disambiguate_state *ds)
{
- int i;
-
if (len < MINIMUM_ABBREV || len > GIT_MAX_HEXSZ)
return -1;
memset(ds, 0, sizeof(*ds));
- for (i = 0; i < len ;i++) {
- unsigned char c = name[i];
- unsigned char val;
- if (c >= '0' && c <= '9')
- val = c - '0';
- else if (c >= 'a' && c <= 'f')
- val = c - 'a' + 10;
- else if (c >= 'A' && c <='F') {
- val = c - 'A' + 10;
- c -= 'A' - 'a';
- }
- else
- return -1;
- ds->hex_pfx[i] = c;
- if (!(i & 1))
- val <<= 4;
- ds->bin_pfx.hash[i >> 1] |= val;
- }
+ if (parse_oid_prefix(name, len, algo, ds->hex_pfx, &ds->bin_pfx) < 0)
+ return -1;
ds->len = len;
- ds->hex_pfx[len] = '\0';
ds->repo = r;
- ds->bin_pfx.algo = algo ? hash_algo_by_ptr(algo) : GIT_HASH_UNKNOWN;
odb_prepare_alternates(r->objects);
return 0;
}
@@ -510,8 +407,8 @@ static int collect_ambiguous(const struct object_id *oid, void *data)
return 0;
}
-static int repo_collect_ambiguous(struct repository *r UNUSED,
- const struct object_id *oid,
+static int repo_collect_ambiguous(const struct object_id *oid,
+ struct object_info *oi UNUSED,
void *data)
{
return collect_ambiguous(oid, data);
@@ -561,6 +458,7 @@ static enum get_oid_result get_short_oid(struct repository *r,
struct object_id *oid,
unsigned flags)
{
+ struct odb_for_each_object_options opts = { 0 };
int status;
struct disambiguate_state ds;
int quietly = !!(flags & GET_OID_QUIETLY);
@@ -588,8 +486,11 @@ static enum get_oid_result get_short_oid(struct repository *r,
else
ds.fn = default_disambiguate_hint;
- find_short_object_filename(&ds);
- find_short_packed_object(&ds);
+ opts.prefix = &ds.bin_pfx;
+ opts.prefix_hex_len = ds.len;
+
+ odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state,
+ &ds, &opts);
status = finish_object_disambiguation(&ds, oid);
/*
@@ -599,8 +500,8 @@ static enum get_oid_result get_short_oid(struct repository *r,
*/
if (status == MISSING_OBJECT) {
odb_reprepare(r->objects);
- find_short_object_filename(&ds);
- find_short_packed_object(&ds);
+ odb_for_each_object_ext(r->objects, NULL, update_disambiguate_state,
+ &ds, &opts);
status = finish_object_disambiguation(&ds, oid);
}
@@ -648,169 +549,25 @@ int repo_for_each_abbrev(struct repository *r, const char *prefix,
const struct git_hash_algo *algo,
each_abbrev_fn fn, void *cb_data)
{
+ struct object_id prefix_oid = { 0 };
+ struct odb_for_each_object_options opts = {
+ .prefix = &prefix_oid,
+ .prefix_hex_len = strlen(prefix),
+ };
struct oid_array collect = OID_ARRAY_INIT;
- struct disambiguate_state ds;
int ret;
- if (init_object_disambiguation(r, prefix, strlen(prefix), algo, &ds) < 0)
+ if (parse_oid_prefix(prefix, opts.prefix_hex_len, algo, NULL, &prefix_oid) < 0)
return -1;
- ds.always_call_fn = 1;
- ds.fn = repo_collect_ambiguous;
- ds.cb_data = &collect;
- find_short_object_filename(&ds);
- find_short_packed_object(&ds);
+ if (odb_for_each_object_ext(r->objects, NULL, repo_collect_ambiguous, &collect, &opts) < 0)
+ return -1;
ret = oid_array_for_each_unique(&collect, fn, cb_data);
oid_array_clear(&collect);
return ret;
}
-/*
- * Return the slot of the most-significant bit set in "val". There are various
- * ways to do this quickly with fls() or __builtin_clzl(), but speed is
- * probably not a big deal here.
- */
-static unsigned msb(unsigned long val)
-{
- unsigned r = 0;
- while (val >>= 1)
- r++;
- return r;
-}
-
-struct min_abbrev_data {
- unsigned int init_len;
- unsigned int cur_len;
- char *hex;
- struct repository *repo;
- const struct object_id *oid;
-};
-
-static inline char get_hex_char_from_oid(const struct object_id *oid,
- unsigned int pos)
-{
- static const char hex[] = "0123456789abcdef";
-
- if ((pos & 1) == 0)
- return hex[oid->hash[pos >> 1] >> 4];
- else
- return hex[oid->hash[pos >> 1] & 0xf];
-}
-
-static int extend_abbrev_len(const struct object_id *oid,
- struct min_abbrev_data *mad)
-{
- unsigned int i = mad->init_len;
- while (mad->hex[i] && mad->hex[i] == get_hex_char_from_oid(oid, i))
- i++;
-
- if (mad->hex[i] && i >= mad->cur_len)
- mad->cur_len = i + 1;
-
- return 0;
-}
-
-static int repo_extend_abbrev_len(struct repository *r UNUSED,
- const struct object_id *oid,
- void *cb_data)
-{
- return extend_abbrev_len(oid, cb_data);
-}
-
-static void find_abbrev_len_for_midx(struct multi_pack_index *m,
- struct min_abbrev_data *mad)
-{
- for (; m; m = m->base_midx) {
- int match = 0;
- uint32_t num, first = 0;
- struct object_id oid;
- const struct object_id *mad_oid;
-
- if (!m->num_objects)
- continue;
-
- num = m->num_objects + m->num_objects_in_base;
- mad_oid = mad->oid;
- match = bsearch_one_midx(mad_oid, m, &first);
-
- /*
- * first is now the position in the packfile where we
- * would insert mad->hash if it does not exist (or the
- * position of mad->hash if it does exist). Hence, we
- * consider a maximum of two objects nearby for the
- * abbreviation length.
- */
- mad->init_len = 0;
- if (!match) {
- if (nth_midxed_object_oid(&oid, m, first))
- extend_abbrev_len(&oid, mad);
- } else if (first < num - 1) {
- if (nth_midxed_object_oid(&oid, m, first + 1))
- extend_abbrev_len(&oid, mad);
- }
- if (first > 0) {
- if (nth_midxed_object_oid(&oid, m, first - 1))
- extend_abbrev_len(&oid, mad);
- }
- mad->init_len = mad->cur_len;
- }
-}
-
-static void find_abbrev_len_for_pack(struct packed_git *p,
- struct min_abbrev_data *mad)
-{
- int match = 0;
- uint32_t num, first = 0;
- struct object_id oid;
- const struct object_id *mad_oid;
-
- if (p->multi_pack_index)
- return;
-
- if (open_pack_index(p) || !p->num_objects)
- return;
-
- num = p->num_objects;
- mad_oid = mad->oid;
- match = bsearch_pack(mad_oid, p, &first);
-
- /*
- * first is now the position in the packfile where we would insert
- * mad->hash if it does not exist (or the position of mad->hash if
- * it does exist). Hence, we consider a maximum of two objects
- * nearby for the abbreviation length.
- */
- mad->init_len = 0;
- if (!match) {
- if (!nth_packed_object_id(&oid, p, first))
- extend_abbrev_len(&oid, mad);
- } else if (first < num - 1) {
- if (!nth_packed_object_id(&oid, p, first + 1))
- extend_abbrev_len(&oid, mad);
- }
- if (first > 0) {
- if (!nth_packed_object_id(&oid, p, first - 1))
- extend_abbrev_len(&oid, mad);
- }
- mad->init_len = mad->cur_len;
-}
-
-static void find_abbrev_len_packed(struct min_abbrev_data *mad)
-{
- struct packed_git *p;
-
- odb_prepare_alternates(mad->repo->objects);
- for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) {
- struct multi_pack_index *m = get_multi_pack_index(source);
- if (m)
- find_abbrev_len_for_midx(m, mad);
- }
-
- repo_for_each_pack(mad->repo, p)
- find_abbrev_len_for_pack(p, mad);
-}
-
void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo,
const struct object_id *oid, int abbrev_len)
{
@@ -827,65 +584,19 @@ void strbuf_add_unique_abbrev(struct strbuf *sb, const struct object_id *oid,
}
int repo_find_unique_abbrev_r(struct repository *r, char *hex,
- const struct object_id *oid, int len)
+ const struct object_id *oid, int min_len)
{
const struct git_hash_algo *algo =
oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
- struct disambiguate_state ds;
- struct min_abbrev_data mad;
- struct object_id oid_ret;
- const unsigned hexsz = algo->hexsz;
-
- if (len < 0) {
- unsigned long count;
+ unsigned len;
- if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
- count = 0;
-
- /*
- * Add one because the MSB only tells us the highest bit set,
- * not including the value of all the _other_ bits (so "15"
- * is only one off of 2^4, but the MSB is the 3rd bit.
- */
- len = msb(count) + 1;
- /*
- * We now know we have on the order of 2^len objects, which
- * expects a collision at 2^(len/2). But we also care about hex
- * chars, not bits, and there are 4 bits per hex. So all
- * together we need to divide by 2 and round up.
- */
- len = DIV_ROUND_UP(len, 2);
- /*
- * For very small repos, we stick with our regular fallback.
- */
- if (len < FALLBACK_DEFAULT_ABBREV)
- len = FALLBACK_DEFAULT_ABBREV;
- }
+ if (odb_find_abbrev_len(r->objects, oid, min_len, &len) < 0)
+ len = algo->hexsz;
oid_to_hex_r(hex, oid);
- if (len >= hexsz || !len)
- return hexsz;
-
- mad.repo = r;
- mad.init_len = len;
- mad.cur_len = len;
- mad.hex = hex;
- mad.oid = oid;
-
- find_abbrev_len_packed(&mad);
-
- if (init_object_disambiguation(r, hex, mad.cur_len, algo, &ds) < 0)
- return -1;
-
- ds.fn = repo_extend_abbrev_len;
- ds.always_call_fn = 1;
- ds.cb_data = (void *)&mad;
-
- find_short_object_filename(&ds);
- (void)finish_object_disambiguation(&ds, &oid_ret);
+ hex[len] = 0;
- hex[mad.cur_len] = 0;
- return mad.cur_len;
+ return len;
}
const char *repo_find_unique_abbrev(struct repository *r,
diff --git a/odb.c b/odb.c
index 350e23f3c0..3f94a53df1 100644
--- a/odb.c
+++ b/odb.c
@@ -12,6 +12,7 @@
#include "midx.h"
#include "object-file-convert.h"
#include "object-file.h"
+#include "object-name.h"
#include "odb.h"
#include "packfile.h"
#include "path.h"
@@ -896,20 +897,20 @@ int odb_freshen_object(struct object_database *odb,
return 0;
}
-int odb_for_each_object(struct object_database *odb,
- const struct object_info *request,
- odb_for_each_object_cb cb,
- void *cb_data,
- unsigned flags)
+int odb_for_each_object_ext(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ const struct odb_for_each_object_options *opts)
{
int ret;
odb_prepare_alternates(odb);
for (struct odb_source *source = odb->sources; source; source = source->next) {
- if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ if (opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
continue;
- ret = odb_source_for_each_object(source, request, cb, cb_data, flags);
+ ret = odb_source_for_each_object(source, request, cb, cb_data, opts);
if (ret)
return ret;
}
@@ -917,6 +918,18 @@ int odb_for_each_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct odb_for_each_object_options opts = {
+ .flags = flags,
+ };
+ return odb_for_each_object_ext(odb, request, cb, cb_data, &opts);
+}
+
int odb_count_objects(struct object_database *odb,
enum odb_count_objects_flags flags,
unsigned long *out)
@@ -952,6 +965,78 @@ out:
return ret;
}
+/*
+ * Return the slot of the most-significant bit set in "val". There are various
+ * ways to do this quickly with fls() or __builtin_clzl(), but speed is
+ * probably not a big deal here.
+ */
+static unsigned msb(unsigned long val)
+{
+ unsigned r = 0;
+ while (val >>= 1)
+ r++;
+ return r;
+}
+
+int odb_find_abbrev_len(struct object_database *odb,
+ const struct object_id *oid,
+ int min_length,
+ unsigned *out)
+{
+ const struct git_hash_algo *algo =
+ oid->algo ? &hash_algos[oid->algo] : odb->repo->hash_algo;
+ const unsigned hexsz = algo->hexsz;
+ unsigned len;
+ int ret;
+
+ if (min_length < 0) {
+ unsigned long count;
+
+ if (odb_count_objects(odb, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0)
+ count = 0;
+
+ /*
+ * Add one because the MSB only tells us the highest bit set,
+ * not including the value of all the _other_ bits (so "15"
+ * is only one off of 2^4, but the MSB is the 3rd bit.
+ */
+ len = msb(count) + 1;
+ /*
+ * We now know we have on the order of 2^len objects, which
+ * expects a collision at 2^(len/2). But we also care about hex
+ * chars, not bits, and there are 4 bits per hex. So all
+ * together we need to divide by 2 and round up.
+ */
+ len = DIV_ROUND_UP(len, 2);
+ /*
+ * For very small repos, we stick with our regular fallback.
+ */
+ if (len < FALLBACK_DEFAULT_ABBREV)
+ len = FALLBACK_DEFAULT_ABBREV;
+ } else {
+ len = min_length;
+ }
+
+ if (len >= hexsz || !len) {
+ *out = hexsz;
+ ret = 0;
+ goto out;
+ }
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ ret = odb_source_find_abbrev_len(source, oid, len, &len);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+ *out = len;
+
+out:
+ return ret;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
diff --git a/odb.h b/odb.h
index 9aee260105..984bafca9d 100644
--- a/odb.h
+++ b/odb.h
@@ -482,6 +482,22 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
void *cb_data);
/*
+ * Options that can be passed to `odb_for_each_object()` and its
+ * backend-specific implementations.
+ */
+struct odb_for_each_object_options {
+ /* A bitfield of `odb_for_each_object_flags`. */
+ enum odb_for_each_object_flags flags;
+
+ /*
+ * If set, only iterate through objects whose first `prefix_hex_len`
+ * hex characters matches the given prefix.
+ */
+ const struct object_id *prefix;
+ size_t prefix_hex_len;
+};
+
+/*
* Iterate through all objects contained in the object database. Note that
* objects may be iterated over multiple times in case they are either stored
* in different backends or in case they are stored in multiple sources.
@@ -495,6 +511,13 @@ typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
* Returns 0 on success, a negative error code in case a failure occurred, or
* an arbitrary non-zero error code returned by the callback itself.
*/
+int odb_for_each_object_ext(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ const struct odb_for_each_object_options *opts);
+
+/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */
int odb_for_each_object(struct object_database *odb,
const struct object_info *request,
odb_for_each_object_cb cb,
@@ -522,6 +545,22 @@ int odb_count_objects(struct object_database *odb,
enum odb_count_objects_flags flags,
unsigned long *out);
+/*
+ * Given an object ID, find the minimum required length required to make the
+ * object ID unique across the whole object database.
+ *
+ * The `min_len` determines the minimum abbreviated length that'll be returned
+ * by this function. If `min_len < 0`, then the function will set a sensible
+ * default minimum abbreviation length.
+ *
+ * Returns 0 on success, a negative error code otherwise. The computed length
+ * will be assigned to `*out`.
+ */
+int odb_find_abbrev_len(struct object_database *odb,
+ const struct object_id *oid,
+ int min_len,
+ unsigned *out);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
diff --git a/odb/source-files.c b/odb/source-files.c
index c08d8993e3..76797569de 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -75,18 +75,18 @@ static int odb_source_files_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags)
+ const struct odb_for_each_object_options *opts)
{
struct odb_source_files *files = odb_source_files_downcast(source);
int ret;
- if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
- ret = odb_source_loose_for_each_object(source, request, cb, cb_data, flags);
+ if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts);
if (ret)
return ret;
}
- ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, flags);
+ ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, opts);
if (ret)
return ret;
@@ -122,6 +122,30 @@ out:
return ret;
}
+static int odb_source_files_find_abbrev_len(struct odb_source *source,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ struct odb_source_files *files = odb_source_files_downcast(source);
+ unsigned len = min_len;
+ int ret;
+
+ ret = packfile_store_find_abbrev_len(files->packed, oid, len, &len);
+ if (ret < 0)
+ goto out;
+
+ ret = odb_source_loose_find_abbrev_len(source, oid, len, &len);
+ if (ret < 0)
+ goto out;
+
+ *out = len;
+ ret = 0;
+
+out:
+ return ret;
+}
+
static int odb_source_files_freshen_object(struct odb_source *source,
const struct object_id *oid)
{
@@ -250,6 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
files->base.read_object_stream = odb_source_files_read_object_stream;
files->base.for_each_object = odb_source_files_for_each_object;
files->base.count_objects = odb_source_files_count_objects;
+ files->base.find_abbrev_len = odb_source_files_find_abbrev_len;
files->base.freshen_object = odb_source_files_freshen_object;
files->base.write_object = odb_source_files_write_object;
files->base.write_object_stream = odb_source_files_write_object_stream;
diff --git a/odb/source.h b/odb/source.h
index 96c906e7a1..a9d7d0b96f 100644
--- a/odb/source.h
+++ b/odb/source.h
@@ -140,7 +140,7 @@ struct odb_source {
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags);
+ const struct odb_for_each_object_options *opts);
/*
* This callback is expected to count objects in the given object
@@ -158,6 +158,18 @@ struct odb_source {
unsigned long *out);
/*
+ * This callback is expected to find the minimum required length to
+ * make the given object ID unique.
+ *
+ * The callback is expected to return a negative error code in case it
+ * failed, 0 otherwise.
+ */
+ int (*find_abbrev_len)(struct odb_source *source,
+ const struct object_id *oid,
+ unsigned min_length,
+ unsigned *out);
+
+ /*
* This callback is expected to freshen the given object so that its
* last access time is set to the current time. This is used to ensure
* that objects that are recent will not get garbage collected even if
@@ -343,9 +355,9 @@ static inline int odb_source_for_each_object(struct odb_source *source,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags)
+ const struct odb_for_each_object_options *opts)
{
- return source->for_each_object(source, request, cb, cb_data, flags);
+ return source->for_each_object(source, request, cb, cb_data, opts);
}
/*
@@ -361,6 +373,18 @@ static inline int odb_source_count_objects(struct odb_source *source,
}
/*
+ * Determine the minimum required length to make the given object ID unique in
+ * the given source. Returns 0 on success, a negative error code otherwise.
+ */
+static inline int odb_source_find_abbrev_len(struct odb_source *source,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ return source->find_abbrev_len(source, oid, min_len, out);
+}
+
+/*
* Freshen an object in the object database by updating its timestamp.
* Returns 1 in case the object has been freshened, 0 in case the object does
* not exist.
diff --git a/oidtree.c b/oidtree.c
index 324de94934..ab9fe7ec7a 100644
--- a/oidtree.c
+++ b/oidtree.c
@@ -6,14 +6,6 @@
#include "oidtree.h"
#include "hash.h"
-struct oidtree_iter_data {
- oidtree_iter fn;
- void *arg;
- size_t *last_nibble_at;
- uint32_t algo;
- uint8_t last_byte;
-};
-
void oidtree_init(struct oidtree *ot)
{
cb_init(&ot->tree);
@@ -54,8 +46,7 @@ void oidtree_insert(struct oidtree *ot, const struct object_id *oid)
cb_insert(&ot->tree, on, sizeof(*oid));
}
-
-int oidtree_contains(struct oidtree *ot, const struct object_id *oid)
+bool oidtree_contains(struct oidtree *ot, const struct object_id *oid)
{
struct object_id k;
size_t klen = sizeof(k);
@@ -69,41 +60,51 @@ int oidtree_contains(struct oidtree *ot, const struct object_id *oid)
klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) <
offsetof(struct object_id, algo));
- return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0;
+ return !!cb_lookup(&ot->tree, (const uint8_t *)&k, klen);
}
-static enum cb_next iter(struct cb_node *n, void *arg)
+struct oidtree_each_data {
+ oidtree_each_cb cb;
+ void *cb_data;
+ size_t *last_nibble_at;
+ uint32_t algo;
+ uint8_t last_byte;
+};
+
+static int iter(struct cb_node *n, void *cb_data)
{
- struct oidtree_iter_data *x = arg;
+ struct oidtree_each_data *data = cb_data;
struct object_id k;
/* Copy to provide 4-byte alignment needed by struct object_id. */
memcpy(&k, n->k, sizeof(k));
- if (x->algo != GIT_HASH_UNKNOWN && x->algo != k.algo)
- return CB_CONTINUE;
+ if (data->algo != GIT_HASH_UNKNOWN && data->algo != k.algo)
+ return 0;
- if (x->last_nibble_at) {
- if ((k.hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0)
- return CB_CONTINUE;
+ if (data->last_nibble_at) {
+ if ((k.hash[*data->last_nibble_at] ^ data->last_byte) & 0xf0)
+ return 0;
}
- return x->fn(&k, x->arg);
+ return data->cb(&k, data->cb_data);
}
-void oidtree_each(struct oidtree *ot, const struct object_id *oid,
- size_t oidhexsz, oidtree_iter fn, void *arg)
+int oidtree_each(struct oidtree *ot, const struct object_id *prefix,
+ size_t prefix_hex_len, oidtree_each_cb cb, void *cb_data)
{
- size_t klen = oidhexsz / 2;
- struct oidtree_iter_data x = { 0 };
- assert(oidhexsz <= GIT_MAX_HEXSZ);
+ struct oidtree_each_data data = {
+ .cb = cb,
+ .cb_data = cb_data,
+ .algo = prefix->algo,
+ };
+ size_t klen = prefix_hex_len / 2;
+ assert(prefix_hex_len <= GIT_MAX_HEXSZ);
- x.fn = fn;
- x.arg = arg;
- x.algo = oid->algo;
- if (oidhexsz & 1) {
- x.last_byte = oid->hash[klen];
- x.last_nibble_at = &klen;
+ if (prefix_hex_len & 1) {
+ data.last_byte = prefix->hash[klen];
+ data.last_nibble_at = &klen;
}
- cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x);
+
+ return cb_each(&ot->tree, prefix->hash, klen, iter, &data);
}
diff --git a/oidtree.h b/oidtree.h
index 77898f510a..2b7bad2e60 100644
--- a/oidtree.h
+++ b/oidtree.h
@@ -5,18 +5,52 @@
#include "hash.h"
#include "mem-pool.h"
+/*
+ * OID trees are an efficient storage for object IDs that use a critbit tree
+ * internally. Common prefixes are duplicated and object IDs are stored in a
+ * way that allow easy iteration over the objects in lexicographic order. As a
+ * consequence, operations that want to enumerate all object IDs that match a
+ * given prefix can be answered efficiently.
+ *
+ * Note that it is not (yet) possible to store data other than the object IDs
+ * themselves in this tree.
+ */
struct oidtree {
struct cb_tree tree;
struct mem_pool mem_pool;
};
-void oidtree_init(struct oidtree *);
-void oidtree_clear(struct oidtree *);
-void oidtree_insert(struct oidtree *, const struct object_id *);
-int oidtree_contains(struct oidtree *, const struct object_id *);
+/* Initialize the oidtree so that it is ready for use. */
+void oidtree_init(struct oidtree *ot);
-typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data);
-void oidtree_each(struct oidtree *, const struct object_id *,
- size_t oidhexsz, oidtree_iter, void *data);
+/*
+ * Release all memory associated with the oidtree and reinitialize it for
+ * subsequent use.
+ */
+void oidtree_clear(struct oidtree *ot);
+
+/* Insert the object ID into the tree. */
+void oidtree_insert(struct oidtree *ot, const struct object_id *oid);
+
+/* Check whether the tree contains the given object ID. */
+bool oidtree_contains(struct oidtree *ot, const struct object_id *oid);
+
+/*
+ * Callback function used for `oidtree_each()`. Returning a non-zero exit code
+ * will cause iteration to stop. The exit code will be propagated to the caller
+ * of `oidtree_each()`.
+ */
+typedef int (*oidtree_each_cb)(const struct object_id *oid,
+ void *cb_data);
+
+/*
+ * Iterate through all object IDs in the tree whose prefix matches the given
+ * object ID prefix and invoke the callback function on each of them.
+ *
+ * Returns any non-zero exit code from the provided callback function.
+ */
+int oidtree_each(struct oidtree *ot,
+ const struct object_id *prefix, size_t prefix_hex_len,
+ oidtree_each_cb cb, void *cb_data);
#endif /* OIDTREE_H */
diff --git a/packfile.c b/packfile.c
index d4de9f3ffe..ee9c7ea1d1 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2371,11 +2371,182 @@ static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
}
}
+static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b)
+{
+ do {
+ if (*a != *b)
+ return 0;
+ a++;
+ b++;
+ len -= 2;
+ } while (len > 1);
+ if (len)
+ if ((*a ^ *b) & 0xf0)
+ return 0;
+ return 1;
+}
+
+static int for_each_prefixed_object_in_midx(
+ struct packfile_store *store,
+ struct multi_pack_index *m,
+ const struct odb_for_each_object_options *opts,
+ struct packfile_store_for_each_object_wrapper_data *data)
+{
+ int ret;
+
+ for (; m; m = m->base_midx) {
+ uint32_t num, i, first = 0;
+ int len = opts->prefix_hex_len > m->source->odb->repo->hash_algo->hexsz ?
+ m->source->odb->repo->hash_algo->hexsz : opts->prefix_hex_len;
+
+ if (!m->num_objects)
+ continue;
+
+ num = m->num_objects + m->num_objects_in_base;
+
+ bsearch_one_midx(opts->prefix, m, &first);
+
+ /*
+ * At this point, "first" is the location of the lowest
+ * object with an object name that could match "opts->prefix".
+ * See if we have 0, 1 or more objects that actually match(es).
+ */
+ for (i = first; i < num; i++) {
+ const struct object_id *current = NULL;
+ struct object_id oid;
+
+ current = nth_midxed_object_oid(&oid, m, i);
+
+ if (!match_hash(len, opts->prefix->hash, current->hash))
+ break;
+
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ ret = packfile_store_read_object_info(store, current,
+ &oi, 0);
+ if (ret)
+ goto out;
+
+ ret = data->cb(&oid, &oi, data->cb_data);
+ if (ret)
+ goto out;
+ } else {
+ ret = data->cb(&oid, NULL, data->cb_data);
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int for_each_prefixed_object_in_pack(
+ struct packfile_store *store,
+ struct packed_git *p,
+ const struct odb_for_each_object_options *opts,
+ struct packfile_store_for_each_object_wrapper_data *data)
+{
+ uint32_t num, i, first = 0;
+ int len = opts->prefix_hex_len > p->repo->hash_algo->hexsz ?
+ p->repo->hash_algo->hexsz : opts->prefix_hex_len;
+ int ret;
+
+ num = p->num_objects;
+ bsearch_pack(opts->prefix, p, &first);
+
+ /*
+ * At this point, "first" is the location of the lowest object
+ * with an object name that could match "bin_pfx". See if we have
+ * 0, 1 or more objects that actually match(es).
+ */
+ for (i = first; i < num; i++) {
+ struct object_id oid;
+
+ nth_packed_object_id(&oid, p, i);
+ if (!match_hash(len, opts->prefix->hash, oid.hash))
+ break;
+
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ ret = packfile_store_read_object_info(store, &oid, &oi, 0);
+ if (ret)
+ goto out;
+
+ ret = data->cb(&oid, &oi, data->cb_data);
+ if (ret)
+ goto out;
+ } else {
+ ret = data->cb(&oid, NULL, data->cb_data);
+ if (ret)
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int packfile_store_for_each_prefixed_object(
+ struct packfile_store *store,
+ const struct odb_for_each_object_options *opts,
+ struct packfile_store_for_each_object_wrapper_data *data)
+{
+ struct packfile_list_entry *e;
+ struct multi_pack_index *m;
+ bool pack_errors = false;
+ int ret;
+
+ if (opts->flags)
+ BUG("flags unsupported");
+
+ store->skip_mru_updates = true;
+
+ m = get_multi_pack_index(store->source);
+ if (m) {
+ ret = for_each_prefixed_object_in_midx(store, m, opts, data);
+ if (ret)
+ goto out;
+ }
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ if (e->pack->multi_pack_index)
+ continue;
+
+ if (open_pack_index(e->pack)) {
+ pack_errors = true;
+ continue;
+ }
+
+ if (!e->pack->num_objects)
+ continue;
+
+ ret = for_each_prefixed_object_in_pack(store, e->pack, opts, data);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ store->skip_mru_updates = false;
+ if (!ret && pack_errors)
+ ret = -1;
+ return ret;
+}
+
int packfile_store_for_each_object(struct packfile_store *store,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags)
+ const struct odb_for_each_object_options *opts)
{
struct packfile_store_for_each_object_wrapper_data data = {
.store = store,
@@ -2386,20 +2557,23 @@ int packfile_store_for_each_object(struct packfile_store *store,
struct packfile_list_entry *e;
int pack_errors = 0, ret;
+ if (opts->prefix)
+ return packfile_store_for_each_prefixed_object(store, opts, &data);
+
store->skip_mru_updates = true;
for (e = packfile_store_get_packs(store); e; e = e->next) {
struct packed_git *p = e->pack;
- if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
- if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
!p->pack_promisor)
continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
p->pack_keep_in_core)
continue;
- if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ if ((opts->flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
p->pack_keep)
continue;
if (open_pack_index(p)) {
@@ -2408,7 +2582,7 @@ int packfile_store_for_each_object(struct packfile_store *store,
}
ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
- &data, flags);
+ &data, opts->flags);
if (ret)
goto out;
}
@@ -2423,6 +2597,117 @@ out:
return ret;
}
+static int extend_abbrev_len(const struct object_id *a,
+ const struct object_id *b,
+ unsigned *out)
+{
+ unsigned len = oid_common_prefix_hexlen(a, b);
+ if (len != hash_algos[a->algo].hexsz && len >= *out)
+ *out = len + 1;
+ return 0;
+}
+
+static void find_abbrev_len_for_midx(struct multi_pack_index *m,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ unsigned len = min_len;
+
+ for (; m; m = m->base_midx) {
+ int match = 0;
+ uint32_t num, first = 0;
+ struct object_id found_oid;
+
+ if (!m->num_objects)
+ continue;
+
+ num = m->num_objects + m->num_objects_in_base;
+ match = bsearch_one_midx(oid, m, &first);
+
+ /*
+ * first is now the position in the packfile where we
+ * would insert the object ID if it does not exist (or the
+ * position of the object ID if it does exist). Hence, we
+ * consider a maximum of two objects nearby for the
+ * abbreviation length.
+ */
+
+ if (!match) {
+ if (nth_midxed_object_oid(&found_oid, m, first))
+ extend_abbrev_len(&found_oid, oid, &len);
+ } else if (first < num - 1) {
+ if (nth_midxed_object_oid(&found_oid, m, first + 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+ if (first > 0) {
+ if (nth_midxed_object_oid(&found_oid, m, first - 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+ }
+
+ *out = len;
+}
+
+static void find_abbrev_len_for_pack(struct packed_git *p,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ int match;
+ uint32_t num, first = 0;
+ struct object_id found_oid;
+ unsigned len = min_len;
+
+ num = p->num_objects;
+ match = bsearch_pack(oid, p, &first);
+
+ /*
+ * first is now the position in the packfile where we would insert
+ * the object ID if it does not exist (or the position of mad->hash if
+ * it does exist). Hence, we consider a maximum of two objects
+ * nearby for the abbreviation length.
+ */
+ if (!match) {
+ if (!nth_packed_object_id(&found_oid, p, first))
+ extend_abbrev_len(&found_oid, oid, &len);
+ } else if (first < num - 1) {
+ if (!nth_packed_object_id(&found_oid, p, first + 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+ if (first > 0) {
+ if (!nth_packed_object_id(&found_oid, p, first - 1))
+ extend_abbrev_len(&found_oid, oid, &len);
+ }
+
+ *out = len;
+}
+
+int packfile_store_find_abbrev_len(struct packfile_store *store,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out)
+{
+ struct packfile_list_entry *e;
+ struct multi_pack_index *m;
+
+ m = get_multi_pack_index(store->source);
+ if (m)
+ find_abbrev_len_for_midx(m, oid, min_len, &min_len);
+
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ if (e->pack->multi_pack_index)
+ continue;
+ if (open_pack_index(e->pack) || !e->pack->num_objects)
+ continue;
+
+ find_abbrev_len_for_pack(e->pack, oid, min_len, &min_len);
+ }
+
+ *out = min_len;
+ return 0;
+}
+
struct add_promisor_object_data {
struct repository *repo;
struct oidset *set;
diff --git a/packfile.h b/packfile.h
index a16ec3950d..45b35973f0 100644
--- a/packfile.h
+++ b/packfile.h
@@ -367,7 +367,12 @@ int packfile_store_for_each_object(struct packfile_store *store,
const struct object_info *request,
odb_for_each_object_cb cb,
void *cb_data,
- unsigned flags);
+ const struct odb_for_each_object_options *opts);
+
+int packfile_store_find_abbrev_len(struct packfile_store *store,
+ const struct object_id *oid,
+ unsigned min_len,
+ unsigned *out);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
diff --git a/t/unit-tests/u-oidtree.c b/t/unit-tests/u-oidtree.c
index e6eede2740..d4d05c7dc3 100644
--- a/t/unit-tests/u-oidtree.c
+++ b/t/unit-tests/u-oidtree.c
@@ -24,7 +24,7 @@ static int fill_tree_loc(struct oidtree *ot, const char *hexes[], size_t n)
return 0;
}
-static void check_contains(struct oidtree *ot, const char *hex, int expected)
+static void check_contains(struct oidtree *ot, const char *hex, bool expected)
{
struct object_id oid;
@@ -38,7 +38,7 @@ struct expected_hex_iter {
const char *query;
};
-static enum cb_next check_each_cb(const struct object_id *oid, void *data)
+static int check_each_cb(const struct object_id *oid, void *data)
{
struct expected_hex_iter *hex_iter = data;
struct object_id expected;
@@ -49,7 +49,7 @@ static enum cb_next check_each_cb(const struct object_id *oid, void *data)
&expected);
cl_assert_equal_s(oid_to_hex(oid), oid_to_hex(&expected));
hex_iter->i += 1;
- return CB_CONTINUE;
+ return 0;
}
LAST_ARG_MUST_BE_NULL
@@ -88,12 +88,12 @@ void test_oidtree__cleanup(void)
void test_oidtree__contains(void)
{
FILL_TREE(&ot, "444", "1", "2", "3", "4", "5", "a", "b", "c", "d", "e");
- check_contains(&ot, "44", 0);
- check_contains(&ot, "441", 0);
- check_contains(&ot, "440", 0);
- check_contains(&ot, "444", 1);
- check_contains(&ot, "4440", 1);
- check_contains(&ot, "4444", 0);
+ check_contains(&ot, "44", false);
+ check_contains(&ot, "441", false);
+ check_contains(&ot, "440", false);
+ check_contains(&ot, "444", true);
+ check_contains(&ot, "4440", true);
+ check_contains(&ot, "4444", false);
}
void test_oidtree__each(void)