aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2026-03-02 17:06:50 -0800
committerJunio C Hamano <gitster@pobox.com>2026-03-02 17:06:50 -0800
commit9eb5b3b999cb89d4a09dcf1012784e74154026de (patch)
treeec9e98a84acf1ba2ca9db5f777dce32449ffcaff
parent2cc71917514657b93014134350864f4849edfc83 (diff)
parent3565faf28c2059c6260d53ac71a303b1c04b0a7b (diff)
downloadgit-9eb5b3b999cb89d4a09dcf1012784e74154026de.tar.xz
Merge branch 'ps/odb-for-each-object'
Revamp object enumeration API around odb. * ps/odb-for-each-object: odb: drop unused `for_each_{loose,packed}_object()` functions reachable: convert to use `odb_for_each_object()` builtin/pack-objects: use `packfile_store_for_each_object()` odb: introduce mtime fields for object info requests treewide: drop uses of `for_each_{loose,packed}_object()` treewide: enumerate promisor objects via `odb_for_each_object()` builtin/fsck: refactor to use `odb_for_each_object()` odb: introduce `odb_for_each_object()` packfile: introduce function to iterate through objects packfile: extract function to iterate through objects of a store object-file: introduce function to iterate through objects object-file: extract function to read object info from path odb: fix flags parameter to be unsigned odb: rename `FOR_EACH_OBJECT_*` flags
-rw-r--r--builtin/cat-file.c36
-rw-r--r--builtin/fsck.c57
-rw-r--r--builtin/pack-objects.c48
-rw-r--r--commit-graph.c46
-rw-r--r--object-file.c125
-rw-r--r--object-file.h22
-rw-r--r--odb.c31
-rw-r--r--odb.h58
-rw-r--r--packfile.c184
-rw-r--r--packfile.h19
-rw-r--r--reachable.c129
-rw-r--r--repack-promisor.c8
-rw-r--r--revision.c10
13 files changed, 462 insertions, 311 deletions
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index df8e87a81f..53ffe80c79 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -806,11 +806,14 @@ struct for_each_object_payload {
void *payload;
};
-static int batch_one_object_loose(const struct object_id *oid,
- const char *path UNUSED,
- void *_payload)
+static int batch_one_object_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *_payload)
{
struct for_each_object_payload *payload = _payload;
+ if (oi && oi->whence == OI_PACKED)
+ return payload->callback(oid, oi->u.packed.pack, oi->u.packed.offset,
+ payload->payload);
return payload->callback(oid, NULL, 0, payload->payload);
}
@@ -846,8 +849,21 @@ static void batch_each_object(struct batch_options *opt,
.payload = _payload,
};
struct bitmap_index *bitmap = NULL;
+ struct odb_source *source;
- for_each_loose_object(the_repository->objects, batch_one_object_loose, &payload, 0);
+ /*
+ * TODO: we still need to tap into implementation details of the object
+ * database sources. Ideally, we should extend `odb_for_each_object()`
+ * to handle object filters itself so that we can move the filtering
+ * logic into the individual sources.
+ */
+ odb_prepare_alternates(the_repository->objects);
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi,
+ &payload, flags);
+ if (ret)
+ break;
+ }
if (opt->objects_filter.choice != LOFC_DISABLED &&
(bitmap = prepare_bitmap_git(the_repository)) &&
@@ -863,8 +879,14 @@ static void batch_each_object(struct batch_options *opt,
&payload, flags);
}
} else {
- for_each_packed_object(the_repository, batch_one_object_packed,
- &payload, flags);
+ struct object_info oi = { 0 };
+
+ for (source = the_repository->objects->sources; source; source = source->next) {
+ int ret = packfile_store_for_each_object(source->packfiles, &oi,
+ batch_one_object_oi, &payload, flags);
+ if (ret)
+ break;
+ }
}
free_bitmap_index(bitmap);
@@ -924,7 +946,7 @@ static int batch_objects(struct batch_options *opt)
cb.seen = &seen;
batch_each_object(opt, batch_unordered_object,
- FOR_EACH_OBJECT_PACK_ORDER, &cb);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER, &cb);
oidset_clear(&seen);
} else {
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 0512f78a87..80e298049f 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -219,15 +219,17 @@ static int mark_used(struct object *obj, enum object_type type UNUSED,
return 0;
}
-static void mark_unreachable_referents(const struct object_id *oid)
+static int mark_unreachable_referents(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *data UNUSED)
{
struct fsck_options options = FSCK_OPTIONS_DEFAULT;
struct object *obj = lookup_object(the_repository, oid);
if (!obj || !(obj->flags & HAS_OBJ))
- return; /* not part of our original set */
+ return 0; /* not part of our original set */
if (obj->flags & REACHABLE)
- return; /* reachable objects already traversed */
+ return 0; /* reachable objects already traversed */
/*
* Avoid passing OBJ_NONE to fsck_walk, which will parse the object
@@ -244,22 +246,7 @@ static void mark_unreachable_referents(const struct object_id *oid)
fsck_walk(obj, NULL, &options);
if (obj->type == OBJ_TREE)
free_tree_buffer((struct tree *)obj);
-}
-static int mark_loose_unreachable_referents(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
- return 0;
-}
-
-static int mark_packed_unreachable_referents(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_unreachable_referents(oid);
return 0;
}
@@ -395,12 +382,8 @@ static void check_connectivity(void)
* and ignore any that weren't present in our earlier
* traversal.
*/
- for_each_loose_object(the_repository->objects,
- mark_loose_unreachable_referents, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_unreachable_referents,
- NULL,
- 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_unreachable_referents, NULL, 0);
}
/* Look up all the requirements, warn about missing objects.. */
@@ -900,26 +883,12 @@ static void fsck_index(struct index_state *istate, const char *index_path,
fsck_resolve_undo(istate, index_path);
}
-static void mark_object_for_connectivity(const struct object_id *oid)
+static int mark_object_for_connectivity(const struct object_id *oid,
+ struct object_info *oi UNUSED,
+ void *cb_data UNUSED)
{
struct object *obj = lookup_unknown_object(the_repository, oid);
obj->flags |= HAS_OBJ;
-}
-
-static int mark_loose_for_connectivity(const struct object_id *oid,
- const char *path UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
- return 0;
-}
-
-static int mark_packed_for_connectivity(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
- void *data UNUSED)
-{
- mark_object_for_connectivity(oid);
return 0;
}
@@ -1068,10 +1037,8 @@ int cmd_fsck(int argc,
odb_reprepare(the_repository->objects);
if (connectivity_only) {
- for_each_loose_object(the_repository->objects,
- mark_loose_for_connectivity, NULL, 0);
- for_each_packed_object(the_repository,
- mark_packed_for_connectivity, NULL, 0);
+ odb_for_each_object(the_repository->objects, NULL,
+ mark_object_for_connectivity, NULL, 0);
} else {
odb_prepare_alternates(the_repository->objects);
for (source = the_repository->objects->sources; source; source = source->next)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 433d77cf27..c1ee4d5ed7 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3912,7 +3912,7 @@ static void read_packs_list_from_stdin(struct rev_info *revs)
for_each_object_in_pack(p,
add_object_entry_from_pack,
revs,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
strbuf_release(&buf);
@@ -4325,25 +4325,12 @@ static void show_edge(struct commit *commit)
}
static int add_object_in_unpacked_pack(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos,
+ struct object_info *oi,
void *data UNUSED)
{
if (cruft) {
- off_t offset;
- time_t mtime;
-
- if (pack->is_cruft) {
- if (load_pack_mtimes(pack) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(pack, pos);
- } else {
- mtime = pack->mtime;
- }
- offset = nth_packed_object_offset(pack, pos);
-
- add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
- NULL, mtime);
+ add_cruft_object_entry(oid, OBJ_NONE, oi->u.packed.pack,
+ oi->u.packed.offset, NULL, *oi->mtimep);
} else {
add_object_entry(oid, OBJ_NONE, "", 0);
}
@@ -4352,14 +4339,25 @@ static int add_object_in_unpacked_pack(const struct object_id *oid,
static void add_objects_in_unpacked_packs(void)
{
- if (for_each_packed_object(to_pack.repo,
- add_object_in_unpacked_pack,
- NULL,
- FOR_EACH_OBJECT_PACK_ORDER |
- FOR_EACH_OBJECT_LOCAL_ONLY |
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
- die(_("cannot open pack index"));
+ struct odb_source *source;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ };
+
+ odb_prepare_alternates(to_pack.repo->objects);
+ for (source = to_pack.repo->objects->sources; source; source = source->next) {
+ if (!source->local)
+ continue;
+
+ if (packfile_store_for_each_object(source->packfiles, &oi,
+ add_object_in_unpacked_pack, NULL,
+ ODB_FOR_EACH_OBJECT_PACK_ORDER |
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY |
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
+ die(_("cannot open pack index"));
+ }
}
static int add_loose_object(const struct object_id *oid, const char *path,
diff --git a/commit-graph.c b/commit-graph.c
index 1fcceb3920..d250a729b1 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1479,30 +1479,38 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
return 0;
}
+static int add_packed_commits_oi(const struct object_id *oid,
+ struct object_info *oi,
+ void *data)
+{
+ struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
+
+ if (ctx->progress)
+ display_progress(ctx->progress, ++ctx->progress_done);
+
+ if (*oi->typep != OBJ_COMMIT)
+ return 0;
+
+ oid_array_append(&ctx->oids, oid);
+ set_commit_pos(ctx->r, oid);
+
+ return 0;
+}
+
static int add_packed_commits(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *data)
{
- struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
enum object_type type;
off_t offset = nth_packed_object_offset(pack, pos);
struct object_info oi = OBJECT_INFO_INIT;
- if (ctx->progress)
- display_progress(ctx->progress, ++ctx->progress_done);
-
oi.typep = &type;
if (packed_object_info(pack, offset, &oi) < 0)
die(_("unable to get type of object %s"), oid_to_hex(oid));
- if (type != OBJ_COMMIT)
- return 0;
-
- oid_array_append(&ctx->oids, oid);
- set_commit_pos(ctx->r, oid);
-
- return 0;
+ return add_packed_commits_oi(oid, &oi, data);
}
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
@@ -1927,7 +1935,7 @@ static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
goto cleanup;
}
for_each_object_in_pack(p, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+ ODB_FOR_EACH_OBJECT_PACK_ORDER);
close_pack(p);
free(p);
}
@@ -1959,13 +1967,23 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
{
+ struct odb_source *source;
+ enum object_type type;
+ struct object_info oi = {
+ .typep = &type,
+ };
+
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
ctx->r,
_("Finding commits for commit graph among packed objects"),
ctx->approx_nr_objects);
- for_each_packed_object(ctx->r, add_packed_commits, ctx,
- FOR_EACH_OBJECT_PACK_ORDER);
+
+ odb_prepare_alternates(ctx->r->objects);
+ for (source = ctx->r->objects->sources; source; source = source->next)
+ packfile_store_for_each_object(source->packfiles, &oi, add_packed_commits_oi,
+ ctx, ODB_FOR_EACH_OBJECT_PACK_ORDER);
+
if (ctx->progress_done < ctx->approx_nr_objects)
display_progress(ctx->progress, ctx->approx_nr_objects);
stop_progress(&ctx->progress);
diff --git a/object-file.c b/object-file.c
index 1b62996ef0..8d1d8c778e 100644
--- a/object-file.c
+++ b/object-file.c
@@ -165,30 +165,13 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
}
/*
- * Find "oid" as a loose object in given source.
- * Returns 0 on success, negative on failure.
+ * Find "oid" as a loose object in given source, open the object and return its
+ * file descriptor. Returns the file descriptor on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to stat_loose_object().
*/
-static int stat_loose_object(struct odb_source_loose *loose,
- const struct object_id *oid,
- struct stat *st, const char **path)
-{
- static struct strbuf buf = STRBUF_INIT;
-
- *path = odb_loose_path(loose->source, &buf, oid);
- if (!lstat(*path, st))
- return 0;
-
- return -1;
-}
-
-/*
- * Like stat_loose_object(), but actually open the object and return the
- * descriptor. See the caveats on the "path" parameter above.
- */
static int open_loose_object(struct odb_source_loose *loose,
const struct object_id *oid, const char **path)
{
@@ -412,19 +395,21 @@ static int parse_loose_header(const char *hdr, struct object_info *oi)
return 0;
}
-int odb_source_loose_read_object_info(struct odb_source *source,
+static int read_object_info_from_path(struct odb_source *source,
+ const char *path,
const struct object_id *oid,
- struct object_info *oi, int flags)
+ struct object_info *oi,
+ unsigned flags)
{
int ret;
int fd;
unsigned long mapsize;
- const char *path;
void *map = NULL;
git_zstream stream, *stream_to_end = NULL;
char hdr[MAX_HEADER_LEN];
unsigned long size_scratch;
enum object_type type_scratch;
+ struct stat st;
/*
* If we don't care about type or size, then we don't
@@ -437,24 +422,28 @@ int odb_source_loose_read_object_info(struct odb_source *source,
if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) {
struct stat st;
- if ((!oi || !oi->disk_sizep) && (flags & OBJECT_INFO_QUICK)) {
+ if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) {
ret = quick_has_loose(source->loose, oid) ? 0 : -1;
goto out;
}
- if (stat_loose_object(source->loose, oid, &st, &path) < 0) {
+ if (lstat(path, &st) < 0) {
ret = -1;
goto out;
}
- if (oi && oi->disk_sizep)
- *oi->disk_sizep = st.st_size;
+ if (oi) {
+ if (oi->disk_sizep)
+ *oi->disk_sizep = st.st_size;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
+ }
ret = 0;
goto out;
}
- fd = open_loose_object(source->loose, oid, &path);
+ fd = git_open(path);
if (fd < 0) {
if (errno != ENOENT)
error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
@@ -462,7 +451,21 @@ int odb_source_loose_read_object_info(struct odb_source *source,
goto out;
}
- map = map_fd(fd, path, &mapsize);
+ if (fstat(fd, &st)) {
+ close(fd);
+ ret = -1;
+ goto out;
+ }
+
+ mapsize = xsize_t(st.st_size);
+ if (!mapsize) {
+ close(fd);
+ ret = error(_("object file %s is empty"), path);
+ goto out;
+ }
+
+ map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0);
+ close(fd);
if (!map) {
ret = -1;
goto out;
@@ -470,6 +473,8 @@ int odb_source_loose_read_object_info(struct odb_source *source,
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
+ if (oi->mtimep)
+ *oi->mtimep = st.st_mtime;
stream_to_end = &stream;
@@ -533,6 +538,16 @@ out:
return ret;
}
+int odb_source_loose_read_object_info(struct odb_source *source,
+ const struct object_id *oid,
+ struct object_info *oi,
+ unsigned flags)
+{
+ static struct strbuf buf = STRBUF_INIT;
+ odb_loose_path(source, &buf, oid);
+ return read_object_info_from_path(source, buf.buf, oid, oi, flags);
+}
+
static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
const void *buf, unsigned long len,
struct object_id *oid,
@@ -1792,24 +1807,52 @@ int for_each_loose_file_in_source(struct odb_source *source,
return r;
}
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn cb, void *data,
- enum for_each_object_flags flags)
-{
+struct for_each_object_wrapper_data {
struct odb_source *source;
+ const struct object_info *request;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
- odb_prepare_alternates(odb);
- for (source = odb->sources; source; source = source->next) {
- int r = for_each_loose_file_in_source(source, cb, NULL,
- NULL, data);
- if (r)
- return r;
+static int for_each_object_wrapper_cb(const struct object_id *oid,
+ const char *path,
+ void *cb_data)
+{
+ struct for_each_object_wrapper_data *data = cb_data;
- if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
- break;
+ if (data->request) {
+ struct object_info oi = *data->request;
+
+ if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0)
+ return -1;
+
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
}
+}
- return 0;
+int odb_source_loose_for_each_object(struct odb_source *source,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct for_each_object_wrapper_data data = {
+ .source = source,
+ .request = request,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+
+ /* There are no loose promisor objects, so we can return immediately. */
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY))
+ return 0;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local)
+ return 0;
+
+ return for_each_loose_file_in_source(source, for_each_object_wrapper_cb,
+ NULL, NULL, &data);
}
static int append_loose_object(const struct object_id *oid,
diff --git a/object-file.h b/object-file.h
index a62d0de394..8df10bbb15 100644
--- a/object-file.h
+++ b/object-file.h
@@ -47,7 +47,8 @@ void odb_source_loose_reprepare(struct odb_source *source);
int odb_source_loose_read_object_info(struct odb_source *source,
const struct object_id *oid,
- struct object_info *oi, int flags);
+ struct object_info *oi,
+ unsigned flags);
int odb_source_loose_read_object_stream(struct odb_read_stream **out,
struct odb_source *source,
@@ -126,16 +127,17 @@ int for_each_loose_file_in_source(struct odb_source *source,
void *data);
/*
- * Iterate over all accessible loose objects without respect to
- * reachability. By default, this includes both local and alternate objects.
- * The order in which objects are visited is unspecified.
- *
- * Any flags specific to packs are ignored.
+ * Iterate through all loose objects in the given object database source and
+ * invoke the callback function for each of them. If an object info request is
+ * given, then the object info will be read for every individual object and
+ * passed to the callback as if `odb_source_loose_read_object_info()` was
+ * called for the object.
*/
-int for_each_loose_object(struct object_database *odb,
- each_loose_object_fn, void *,
- enum for_each_object_flags flags);
-
+int odb_source_loose_for_each_object(struct odb_source *source,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/**
* format_object_header() is a thin wrapper around s xsnprintf() that
diff --git a/odb.c b/odb.c
index 1679cc0465..025cd76520 100644
--- a/odb.c
+++ b/odb.c
@@ -702,6 +702,8 @@ static int do_oid_object_info_extended(struct object_database *odb,
oidclr(oi->delta_base_oid, odb->repo->hash_algo);
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
+ if (oi->mtimep)
+ *oi->mtimep = 0;
oi->whence = OI_CACHED;
}
return 0;
@@ -995,6 +997,35 @@ int odb_freshen_object(struct object_database *odb,
return 0;
}
+int odb_for_each_object(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ int ret;
+
+ odb_prepare_alternates(odb);
+ for (struct odb_source *source = odb->sources; source; source = source->next) {
+ if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local)
+ continue;
+
+ if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) {
+ ret = odb_source_loose_for_each_object(source, request,
+ cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = packfile_store_for_each_object(source->packfiles, request,
+ cb, cb_data, flags);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect)
{
diff --git a/odb.h b/odb.h
index 83d3a37805..2e643de2ee 100644
--- a/odb.h
+++ b/odb.h
@@ -335,6 +335,19 @@ struct object_info {
struct object_id *delta_base_oid;
void **contentp;
+ /*
+ * The time the given looked-up object has been last modified.
+ *
+ * Note: the mtime may be ambiguous in case the object exists multiple
+ * times in the object database. It is thus _not_ recommended to use
+ * this field outside of contexts where you would read every instance
+ * of the object, like for example with `odb_for_each_object()`. As it
+ * is impossible to say at the ODB level what the intent of the caller
+ * is (e.g. whether to find the oldest or newest object), it is the
+ * responsibility of the caller to disambiguate the mtimes.
+ */
+ time_t *mtimep;
+
/* Response */
enum {
OI_CACHED,
@@ -459,26 +472,59 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
};
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ * If an object info request is given, then the object info will be read and
+ * passed to the callback as if `odb_read_object_info()` was called for the
+ * object.
+ *
+ * Returning a non-zero error code from the callback function will cause
+ * iteration to abort. The error code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
+
enum {
/*
* By default, `odb_write_object()` does not actually write anything
diff --git a/packfile.c b/packfile.c
index 402c3b5dc7..57fbf51876 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1578,13 +1578,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
hashmap_add(&delta_base_cache, &ent->ent);
}
-int packed_object_info(struct packed_git *p,
- off_t obj_offset, struct object_info *oi)
+static int packed_object_info_with_index_pos(struct packed_git *p, off_t obj_offset,
+ uint32_t *maybe_index_pos, struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type = OBJ_NONE;
+ uint32_t pack_pos;
int ret;
/*
@@ -1619,16 +1620,35 @@ int packed_object_info(struct packed_git *p,
}
}
- if (oi->disk_sizep) {
- uint32_t pos;
- if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ if (oi->disk_sizep || (oi->mtimep && p->is_cruft)) {
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
error("could not find object at offset %"PRIuMAX" "
"in pack %s", (uintmax_t)obj_offset, p->pack_name);
ret = -1;
goto out;
}
+ }
+
+ if (oi->disk_sizep)
+ *oi->disk_sizep = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+
+ if (oi->mtimep) {
+ if (p->is_cruft) {
+ uint32_t index_pos;
+
+ if (load_pack_mtimes(p) < 0)
+ die(_("could not load .mtimes for cruft pack '%s'"),
+ pack_basename(p));
- *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
+ if (maybe_index_pos)
+ index_pos = *maybe_index_pos;
+ else
+ index_pos = pack_pos_to_index(p, pack_pos);
+
+ *oi->mtimep = nth_packed_mtime(p, index_pos);
+ } else {
+ *oi->mtimep = p->mtime;
+ }
}
if (oi->typep) {
@@ -1681,6 +1701,12 @@ out:
return ret;
}
+int packed_object_info(struct packed_git *p, off_t obj_offset,
+ struct object_info *oi)
+{
+ return packed_object_info_with_index_pos(p, obj_offset, NULL, oi);
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2259,12 +2285,12 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid,
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn cb, void *data,
- enum for_each_object_flags flags)
+ unsigned flags)
{
uint32_t i;
int r = 0;
- if (flags & FOR_EACH_OBJECT_PACK_ORDER) {
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER) {
if (load_pack_revindex(p->repo, p))
return -1;
}
@@ -2285,7 +2311,7 @@ int for_each_object_in_pack(struct packed_git *p,
* - in pack-order, it is pack position, which we must
* convert to an index position in order to get the oid.
*/
- if (flags & FOR_EACH_OBJECT_PACK_ORDER)
+ if (flags & ODB_FOR_EACH_OBJECT_PACK_ORDER)
index_pos = pack_pos_to_index(p, i);
else
index_pos = i;
@@ -2301,75 +2327,114 @@ int for_each_object_in_pack(struct packed_git *p,
return r;
}
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags)
+struct packfile_store_for_each_object_wrapper_data {
+ struct packfile_store *store;
+ const struct object_info *request;
+ odb_for_each_object_cb cb;
+ void *cb_data;
+};
+
+static int packfile_store_for_each_object_wrapper(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t index_pos,
+ void *cb_data)
{
- struct odb_source *source;
- int r = 0;
- int pack_errors = 0;
+ struct packfile_store_for_each_object_wrapper_data *data = cb_data;
- odb_prepare_alternates(repo->objects);
+ if (data->request) {
+ off_t offset = nth_packed_object_offset(pack, index_pos);
+ struct object_info oi = *data->request;
- for (source = repo->objects->sources; source; source = source->next) {
- struct packfile_list_entry *e;
+ if (packed_object_info_with_index_pos(pack, offset,
+ &index_pos, &oi) < 0) {
+ mark_bad_packed_object(pack, oid);
+ return -1;
+ }
- source->packfiles->skip_mru_updates = true;
+ return data->cb(oid, &oi, data->cb_data);
+ } else {
+ return data->cb(oid, NULL, data->cb_data);
+ }
+}
- for (e = packfile_store_get_packs(source->packfiles); e; e = e->next) {
- struct packed_git *p = e->pack;
+int packfile_store_for_each_object(struct packfile_store *store,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags)
+{
+ struct packfile_store_for_each_object_wrapper_data data = {
+ .store = store,
+ .request = request,
+ .cb = cb,
+ .cb_data = cb_data,
+ };
+ struct packfile_list_entry *e;
+ int pack_errors = 0, ret;
- if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
- continue;
- if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&
- !p->pack_promisor)
- continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
- p->pack_keep_in_core)
- continue;
- if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
- p->pack_keep)
- continue;
- if (open_pack_index(p)) {
- pack_errors = 1;
- continue;
- }
+ store->skip_mru_updates = true;
- r = for_each_object_in_pack(p, cb, data, flags);
- if (r)
- break;
- }
+ for (e = packfile_store_get_packs(store); e; e = e->next) {
+ struct packed_git *p = e->pack;
- source->packfiles->skip_mru_updates = false;
+ if ((flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY) &&
+ !p->pack_promisor)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&
+ p->pack_keep_in_core)
+ continue;
+ if ((flags & ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&
+ p->pack_keep)
+ continue;
+ if (open_pack_index(p)) {
+ pack_errors = 1;
+ continue;
+ }
- if (r)
- break;
+ ret = for_each_object_in_pack(p, packfile_store_for_each_object_wrapper,
+ &data, flags);
+ if (ret)
+ goto out;
}
- return r ? r : pack_errors;
+ ret = 0;
+
+out:
+ store->skip_mru_updates = false;
+
+ if (!ret && pack_errors)
+ ret = -1;
+ return ret;
}
+struct add_promisor_object_data {
+ struct repository *repo;
+ struct oidset *set;
+};
+
static int add_promisor_object(const struct object_id *oid,
- struct packed_git *pack,
- uint32_t pos UNUSED,
- void *set_)
+ struct object_info *oi UNUSED,
+ void *cb_data)
{
- struct oidset *set = set_;
+ struct add_promisor_object_data *data = cb_data;
struct object *obj;
int we_parsed_object;
- obj = lookup_object(pack->repo, oid);
+ obj = lookup_object(data->repo, oid);
if (obj && obj->parsed) {
we_parsed_object = 0;
} else {
we_parsed_object = 1;
- obj = parse_object_with_flags(pack->repo, oid,
+ obj = parse_object_with_flags(data->repo, oid,
PARSE_OBJECT_SKIP_HASH_CHECK);
}
if (!obj)
return 1;
- oidset_insert(set, oid);
+ oidset_insert(data->set, oid);
/*
* If this is a tree, commit, or tag, the objects it refers
@@ -2387,19 +2452,19 @@ static int add_promisor_object(const struct object_id *oid,
*/
return 0;
while (tree_entry_gently(&desc, &entry))
- oidset_insert(set, &entry.oid);
+ oidset_insert(data->set, &entry.oid);
if (we_parsed_object)
free_tree_buffer(tree);
} else if (obj->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *) obj;
struct commit_list *parents = commit->parents;
- oidset_insert(set, get_commit_tree_oid(commit));
+ oidset_insert(data->set, get_commit_tree_oid(commit));
for (; parents; parents = parents->next)
- oidset_insert(set, &parents->item->object.oid);
+ oidset_insert(data->set, &parents->item->object.oid);
} else if (obj->type == OBJ_TAG) {
struct tag *tag = (struct tag *) obj;
- oidset_insert(set, get_tagged_oid(tag));
+ oidset_insert(data->set, get_tagged_oid(tag));
}
return 0;
}
@@ -2411,10 +2476,13 @@ int is_promisor_object(struct repository *r, const struct object_id *oid)
if (!promisor_objects_prepared) {
if (repo_has_promisor_remote(r)) {
- for_each_packed_object(r, add_promisor_object,
- &promisor_objects,
- FOR_EACH_OBJECT_PROMISOR_ONLY |
- FOR_EACH_OBJECT_PACK_ORDER);
+ struct add_promisor_object_data data = {
+ .repo = r,
+ .set = &promisor_objects,
+ };
+
+ odb_for_each_object(r->objects, NULL, add_promisor_object, &data,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER);
}
promisor_objects_prepared = 1;
}
diff --git a/packfile.h b/packfile.h
index acc5c55ad5..1a1b720764 100644
--- a/packfile.h
+++ b/packfile.h
@@ -339,9 +339,22 @@ typedef int each_packed_object_fn(const struct object_id *oid,
void *data);
int for_each_object_in_pack(struct packed_git *p,
each_packed_object_fn, void *data,
- enum for_each_object_flags flags);
-int for_each_packed_object(struct repository *repo, each_packed_object_fn cb,
- void *data, enum for_each_object_flags flags);
+ unsigned flags);
+
+/*
+ * Iterate through all packed objects in the given packfile store and invoke
+ * the callback function for each of them. If an object info request is given,
+ * then the object info will be read for every individual object and passed to
+ * the callback as if `packfile_store_read_object_info()` was called for the
+ * object.
+ *
+ * The flags parameter is a combination of `odb_for_each_object_flags`.
+ */
+int packfile_store_for_each_object(struct packfile_store *store,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ unsigned flags);
/* A hook to report invalid files in pack directory */
#define PACKDIR_FILE_PACK 1
diff --git a/reachable.c b/reachable.c
index 4b532039d5..101cfc2727 100644
--- a/reachable.c
+++ b/reachable.c
@@ -191,30 +191,27 @@ static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
return oidset_contains(&data->extra_recent_oids, oid);
}
-static void add_recent_object(const struct object_id *oid,
- struct packed_git *pack,
- off_t offset,
- timestamp_t mtime,
- struct recent_data *data)
+static int want_recent_object(struct recent_data *data,
+ const struct object_id *oid)
{
- struct object *obj;
- enum object_type type;
+ if (data->ignore_in_core_kept_packs &&
+ has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ return 0;
+ return 1;
+}
- if (!obj_is_recent(oid, mtime, data))
- return;
+static int add_recent_object(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data)
+{
+ struct recent_data *data = cb_data;
+ struct object *obj;
- /*
- * We do not want to call parse_object here, because
- * inflating blobs and trees could be very expensive.
- * However, we do need to know the correct type for
- * later processing, and the revision machinery expects
- * commits and tags to have been parsed.
- */
- type = odb_read_object_info(the_repository->objects, oid, NULL);
- if (type < 0)
- die("unable to get object info for %s", oid_to_hex(oid));
+ if (!want_recent_object(data, oid) ||
+ !obj_is_recent(oid, *oi->mtimep, data))
+ return 0;
- switch (type) {
+ switch (*oi->typep) {
case OBJ_TAG:
case OBJ_COMMIT:
obj = parse_object_or_die(the_repository, oid, NULL);
@@ -227,77 +224,22 @@ static void add_recent_object(const struct object_id *oid,
break;
default:
die("unknown object type for %s: %s",
- oid_to_hex(oid), type_name(type));
+ oid_to_hex(oid), type_name(*oi->typep));
}
if (!obj)
die("unable to lookup %s", oid_to_hex(oid));
-
- add_pending_object(data->revs, obj, "");
- if (data->cb)
- data->cb(obj, pack, offset, mtime);
-}
-
-static int want_recent_object(struct recent_data *data,
- const struct object_id *oid)
-{
- if (data->ignore_in_core_kept_packs &&
- has_object_kept_pack(data->revs->repo, oid, KEPT_PACK_IN_CORE))
+ if (obj->flags & SEEN)
return 0;
- return 1;
-}
-static int add_recent_loose(const struct object_id *oid,
- const char *path, void *data)
-{
- struct stat st;
- struct object *obj;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
-
- if (stat(path, &st) < 0) {
- /*
- * It's OK if an object went away during our iteration; this
- * could be due to a simultaneous repack. But anything else
- * we should abort, since we might then fail to mark objects
- * which should not be pruned.
- */
- if (errno == ENOENT)
- return 0;
- return error_errno("unable to stat %s", oid_to_hex(oid));
+ add_pending_object(data->revs, obj, "");
+ if (data->cb) {
+ if (oi->whence == OI_PACKED)
+ data->cb(obj, oi->u.packed.pack, oi->u.packed.offset, *oi->mtimep);
+ else
+ data->cb(obj, NULL, 0, *oi->mtimep);
}
- add_recent_object(oid, NULL, 0, st.st_mtime, data);
- return 0;
-}
-
-static int add_recent_packed(const struct object_id *oid,
- struct packed_git *p,
- uint32_t pos,
- void *data)
-{
- struct object *obj;
- timestamp_t mtime = p->mtime;
-
- if (!want_recent_object(data, oid))
- return 0;
-
- obj = lookup_object(the_repository, oid);
-
- if (obj && obj->flags & SEEN)
- return 0;
- if (p->is_cruft) {
- if (load_pack_mtimes(p) < 0)
- die(_("could not load cruft pack .mtimes"));
- mtime = nth_packed_mtime(p, pos);
- }
- add_recent_object(oid, p, nth_packed_object_offset(p, pos), mtime, data);
return 0;
}
@@ -307,7 +249,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
int ignore_in_core_kept_packs)
{
struct recent_data data;
- enum for_each_object_flags flags;
+ unsigned flags;
+ enum object_type type;
+ time_t mtime;
+ struct object_info oi = {
+ .mtimep = &mtime,
+ .typep = &type,
+ };
int r;
data.revs = revs;
@@ -318,16 +266,13 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
oidset_init(&data.extra_recent_oids, 0);
data.extra_recent_oids_loaded = 0;
- r = for_each_loose_object(the_repository->objects, add_recent_loose, &data,
- FOR_EACH_OBJECT_LOCAL_ONLY);
- if (r)
- goto done;
-
- flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
+ flags = ODB_FOR_EACH_OBJECT_LOCAL_ONLY | ODB_FOR_EACH_OBJECT_PACK_ORDER;
if (ignore_in_core_kept_packs)
- flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
+ flags |= ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
- r = for_each_packed_object(revs->repo, add_recent_packed, &data, flags);
+ r = odb_for_each_object(revs->repo->objects, &oi, add_recent_object, &data, flags);
+ if (r)
+ goto done;
done:
oidset_clear(&data.extra_recent_oids);
diff --git a/repack-promisor.c b/repack-promisor.c
index 73af57bce3..90318ce150 100644
--- a/repack-promisor.c
+++ b/repack-promisor.c
@@ -17,8 +17,8 @@ struct write_oid_context {
* necessary.
*/
static int write_oid(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED, void *data)
+ struct object_info *oi UNUSED,
+ void *data)
{
struct write_oid_context *ctx = data;
struct child_process *cmd = ctx->cmd;
@@ -98,8 +98,8 @@ void repack_promisor_objects(struct repository *repo,
*/
ctx.cmd = &cmd;
ctx.algop = repo->hash_algo;
- for_each_packed_object(repo, write_oid, &ctx,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
+ odb_for_each_object(repo->objects, NULL, write_oid, &ctx,
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (cmd.in == -1) {
/* No packed objects; cmd was never started */
diff --git a/revision.c b/revision.c
index 047ff7e458..ddddbe4993 100644
--- a/revision.c
+++ b/revision.c
@@ -3632,8 +3632,7 @@ void reset_revision_walk(void)
}
static int mark_uninteresting(const struct object_id *oid,
- struct packed_git *pack UNUSED,
- uint32_t pos UNUSED,
+ struct object_info *oi UNUSED,
void *cb)
{
struct rev_info *revs = cb;
@@ -3942,10 +3941,9 @@ int prepare_revision_walk(struct rev_info *revs)
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
- if (revs->exclude_promisor_objects) {
- for_each_packed_object(revs->repo, mark_uninteresting, revs,
- FOR_EACH_OBJECT_PROMISOR_ONLY);
- }
+ if (revs->exclude_promisor_objects)
+ odb_for_each_object(revs->repo->objects, NULL, mark_uninteresting,
+ revs, ODB_FOR_EACH_OBJECT_PROMISOR_ONLY);
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);