From ba1c21d34346e5979f9308806274bfcda4949ad4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 5 Mar 2026 15:19:41 +0100 Subject: odb: split `struct odb_source` into separate header Subsequent commits will expand the `struct odb_source` to become a generic interface for accessing an object database source. As part of these refactorings we'll add a set of function pointers that will significantly expand the structure overall. Prepare for this by splitting out the `struct odb_source` into a separate header. This keeps the high-level object database interface detached from the low-level object database sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 45 +-------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) (limited to 'odb.h') diff --git a/odb.h b/odb.h index 68b8ec2289..e13b5b7c44 100644 --- a/odb.h +++ b/odb.h @@ -3,6 +3,7 @@ #include "hashmap.h" #include "object.h" +#include "odb/source.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -30,50 +31,6 @@ extern int fetch_if_missing; */ char *compute_alternate_path(const char *path, struct strbuf *err); -/* - * The source is the part of the object database that stores the actual - * objects. It thus encapsulates the logic to read and write the specific - * on-disk format. An object database can have multiple sources: - * - * - The primary source, which is typically located in "$GIT_DIR/objects". - * This is where new objects are usually written to. - * - * - Alternate sources, which are configured via "objects/info/alternates" or - * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These - * alternate sources are only used to read objects. - */ -struct odb_source { - struct odb_source *next; - - /* Object database that owns this object source. */ - struct object_database *odb; - - /* Private state for loose objects. */ - struct odb_source_loose *loose; - - /* Should only be accessed directly by packfile.c and midx.c. */ - struct packfile_store *packfiles; - - /* - * Figure out whether this is the local source of the owning - * repository, which would typically be its ".git/objects" directory. - * This local object directory is usually where objects would be - * written to. - */ - bool local; - - /* - * This object store is ephemeral, so there is no need to fsync. - */ - int will_destroy; - - /* - * Path to the source. If this is a relative path, it is relative to - * the current working directory. - */ - char *path; -}; - struct packed_git; struct packfile_store; struct cached_object_entry; -- cgit v1.3 From 5946a564cddc0bf471f27ae4c3fe205441e3ef65 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 5 Mar 2026 15:19:49 +0100 Subject: odb/source: make `read_object_info()` function pluggable Introduce a new callback function in `struct odb_source` to make the function pluggable. Note that this function is a bit less straight-forward to convert compared to the other functions. The reason here is that the logic to read an object is: 1. We try to read the object. If it exists we return it. 2. If the object does not exist we reprepare the object database source. 3. We then try reading the object info a second time in case the reprepare caused it to appear. The second read is only supposed to happen for the packfile store though, as reading loose objects is not impacted by repreparing the object database. Ideally, we'd just move this whole logic into the ODB source. But that's not easily possible because we try to avoid the reprepare unless really required, which is after we have found out that no other ODB source contains the object, either. So the logic spans across multiple ODB sources, and consequently we cannot move it into an individual source. Instead, introduce a new flag `OBJECT_INFO_SECOND_READ` that tells the backend that we already tried to look up the object once, and that this time around the ODB source should try to find any new objects that may have surfaced due to an on-disk change. With this flag, the "files" backend can trivially skip trying to re-read the object as a loose object. Furthermore, as we know that we only try the second read via the packfile store, we can skip repreparing loose objects and only reprepare the packfile store. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 10 +++++++ odb.c | 22 +++++++-------- odb.h | 24 ----------------- odb/source-files.c | 15 +++++++++++ odb/source.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ packfile.c | 10 ++++++- 6 files changed, 122 insertions(+), 37 deletions(-) (limited to 'odb.h') diff --git a/object-file.c b/object-file.c index 7ef8291a48..eefde72c7d 100644 --- a/object-file.c +++ b/object-file.c @@ -546,6 +546,16 @@ int odb_source_loose_read_object_info(struct odb_source *source, enum object_info_flags flags) { static struct strbuf buf = STRBUF_INIT; + + /* + * The second read shouldn't cause new loose objects to show up, unless + * there was a race condition with a secondary process. We don't care + * about this case though, so we simply skip reading loose objects a + * second time. + */ + if (flags & OBJECT_INFO_SECOND_READ) + return -1; + odb_loose_path(source, &buf, oid); return read_object_info_from_path(source, buf.buf, oid, oi, flags); } diff --git a/odb.c b/odb.c index f7487eb0df..c0b8cd062b 100644 --- a/odb.c +++ b/odb.c @@ -688,22 +688,20 @@ static int do_oid_object_info_extended(struct object_database *odb, while (1) { struct odb_source *source; - /* Most likely it's a loose object. */ - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_info(files->packed, real, oi, flags) || - !odb_source_loose_read_object_info(source, real, oi, flags)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, flags)) return 0; - } - /* Not a loose object; someone else may have just packed it. */ + /* + * When the object hasn't been found we try a second read and + * tell the sources so. This may cause them to invalidate + * caches or reload on-disk state. + */ if (!(flags & OBJECT_INFO_QUICK)) { - odb_reprepare(odb->repo->objects); - for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!packfile_store_read_object_info(files->packed, real, oi, flags)) + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, real, oi, + flags | OBJECT_INFO_SECOND_READ)) return 0; - } } /* diff --git a/odb.h b/odb.h index e13b5b7c44..70ffb033f9 100644 --- a/odb.h +++ b/odb.h @@ -339,30 +339,6 @@ struct object_info { */ #define OBJECT_INFO_INIT { 0 } -/* Flags that can be passed to `odb_read_object_info_extended()`. */ -enum object_info_flags { - /* Invoke lookup_replace_object() on the given hash. */ - OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), - - /* Do not reprepare object sources when the first lookup has failed. */ - OBJECT_INFO_QUICK = (1 << 1), - - /* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ - OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), - - /* Die if object corruption (not just an object being missing) was detected. */ - OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), - - /* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. - */ - OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), -}; - /* * Read object info from the object database and populate the `object_info` * structure. Returns 0 on success, a negative error code otherwise. diff --git a/odb/source-files.c b/odb/source-files.c index 20a24f524a..f2969a1214 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -41,6 +41,20 @@ static void odb_source_files_reprepare(struct odb_source *source) packfile_store_reprepare(files->packed); } +static int odb_source_files_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + + if (!packfile_store_read_object_info(files->packed, oid, oi, flags) || + !odb_source_loose_read_object_info(source, oid, oi, flags)) + return 0; + + return -1; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -55,6 +69,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.free = odb_source_files_free; files->base.close = odb_source_files_close; files->base.reprepare = odb_source_files_reprepare; + files->base.read_object_info = odb_source_files_read_object_info; /* * Ideally, we would only ever store absolute paths in the source. This diff --git a/odb/source.h b/odb/source.h index 0e6c6abdb1..150becafe6 100644 --- a/odb/source.h +++ b/odb/source.h @@ -12,6 +12,45 @@ enum odb_source_type { ODB_SOURCE_FILES, }; +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 1), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), + + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), + + /* + * We have already tried reading the object, but it couldn't be found + * via any of the attached sources, and are now doing a second read. + * This second read asks the individual sources to also evaluate + * whether any on-disk state may have changed that may have caused the + * object to appear. + * + * This flag is for internal use, only. The second read only occurs + * when `OBJECT_INFO_QUICK` was not passed. + */ + OBJECT_INFO_SECOND_READ = (1 << 4), + + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; + +struct object_id; +struct object_info; + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -72,6 +111,33 @@ struct odb_source { * example just been repacked so that new objects will become visible. */ void (*reprepare)(struct odb_source *source); + + /* + * This callback is expected to read object information from the object + * database source. The object info will be partially populated with + * pointers for each bit of information that was requested by the + * caller. + * + * The flags field is a combination of `OBJECT_INFO` flags. Only the + * following fields need to be handled by the backend: + * + * - `OBJECT_INFO_QUICK` indicates it is fine to use caches without + * re-verifying the data. + * + * - `OBJECT_INFO_SECOND_READ` indicates that the initial object + * lookup has failed and that the object sources should check + * whether any of its on-disk state has changed that may have + * caused the object to appear. Sources are free to ignore the + * second read in case they know that the first read would have + * already surfaced the object without reloading any on-disk state. + * + * The callback is expected to return a negative error code in case + * reading the object has failed, 0 otherwise. + */ + int (*read_object_info)(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags); }; /* @@ -131,4 +197,16 @@ static inline void odb_source_reprepare(struct odb_source *source) source->reprepare(source); } +/* + * Read an object from the object database source identified by its object ID. + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + return source->read_object_info(source, oid, oi, flags); +} + #endif diff --git a/packfile.c b/packfile.c index da1c0dfa39..71db10e7c6 100644 --- a/packfile.c +++ b/packfile.c @@ -2181,11 +2181,19 @@ int packfile_store_freshen_object(struct packfile_store *store, int packfile_store_read_object_info(struct packfile_store *store, const struct object_id *oid, struct object_info *oi, - enum object_info_flags flags UNUSED) + enum object_info_flags flags) { struct pack_entry e; int ret; + /* + * In case the first read didn't surface the object, we have to reload + * packfiles. This may cause us to discover new packfiles that have + * been added since the last time we have prepared the packfile store. + */ + if (flags & OBJECT_INFO_SECOND_READ) + packfile_store_reprepare(store); + if (!find_pack_entry(store, oid, &e)) return 1; -- cgit v1.3 From fdefdc2e6979e9e8cb28b34c458f42b44f217bf0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 5 Mar 2026 15:19:51 +0100 Subject: odb/source: make `for_each_object()` function pluggable Introduce a new callback function in `struct odb_source` to make the function pluggable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 12 +--------- odb.h | 12 ---------- odb/source-files.c | 23 +++++++++++++++++++ odb/source.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 23 deletions(-) (limited to 'odb.h') diff --git a/odb.c b/odb.c index c0b8cd062b..494a3273cf 100644 --- a/odb.c +++ b/odb.c @@ -984,20 +984,10 @@ int odb_for_each_object(struct object_database *odb, odb_prepare_alternates(odb); for (struct odb_source *source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY && !source->local) continue; - if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, - cb, cb_data, flags); - if (ret) - return ret; - } - - ret = packfile_store_for_each_object(files->packed, request, - cb, cb_data, flags); + ret = odb_source_for_each_object(source, request, cb, cb_data, flags); if (ret) return ret; } diff --git a/odb.h b/odb.h index 70ffb033f9..692d9029ef 100644 --- a/odb.h +++ b/odb.h @@ -432,18 +432,6 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; -/* - * A callback function that can be used to iterate through objects. If given, - * the optional `oi` parameter will be populated the same as if you would call - * `odb_read_object_info()`. - * - * Returning a non-zero error code will cause iteration to abort. The error - * code will be propagated. - */ -typedef int (*odb_for_each_object_cb)(const struct object_id *oid, - struct object_info *oi, - void *cb_data); - /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored diff --git a/odb/source-files.c b/odb/source-files.c index b50a1f5492..d8ef1d8237 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -66,6 +66,28 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out, return -1; } +static int odb_source_files_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + int ret; + + if (!(flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { + ret = odb_source_loose_for_each_object(source, request, cb, cb_data, flags); + if (ret) + return ret; + } + + ret = packfile_store_for_each_object(files->packed, request, cb, cb_data, flags); + if (ret) + return ret; + + return 0; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -82,6 +104,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.reprepare = odb_source_files_reprepare; files->base.read_object_info = odb_source_files_read_object_info; files->base.read_object_stream = odb_source_files_read_object_stream; + files->base.for_each_object = odb_source_files_for_each_object; /* * Ideally, we would only ever store absolute paths in the source. This diff --git a/odb/source.h b/odb/source.h index 4397cada27..be56995389 100644 --- a/odb/source.h +++ b/odb/source.h @@ -52,6 +52,18 @@ struct object_id; struct object_info; struct odb_read_stream; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -150,6 +162,30 @@ struct odb_source { int (*read_object_stream)(struct odb_read_stream **out, struct odb_source *source, const struct object_id *oid); + + /* + * This callback is expected to iterate over all objects stored in this + * source and invoke the callback function for each of them. It is + * valid to yield the same object multiple time. A non-zero exit code + * from the object callback shall abort iteration. + * + * The optional `request` structure should serve as a template for + * looking up object info for every individual iterated object. It + * should not be modified directly and should instead be copied into a + * separate `struct object_info` that gets passed to the callback. If + * the caller passes a `NULL` pointer then the object itself shall not + * be read. + * + * The callback is expected to return a negative error code in case the + * iteration has failed to read all objects, 0 otherwise. When the + * callback function returns a non-zero error code then that error code + * should be returned. + */ + int (*for_each_object)(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags); }; /* @@ -232,4 +268,33 @@ static inline int odb_source_read_object_stream(struct odb_read_stream **out, return source->read_object_stream(out, source, oid); } +/* + * Iterate through all objects contained in the given source and invoke the + * callback function for each of them. Returning a non-zero code from the + * callback function aborts iteration. There is no guarantee that objects + * are only iterated over once. + * + * The optional `request` structure serves as a template for retrieving the + * object info for each indvidual iterated object and will be populated as if + * `odb_source_read_object_info()` was called on the object. It will not be + * modified, the callback will instead be invoked with a separate `struct + * object_info` for every object. Object info will not be read when passing a + * `NULL` pointer. + * + * The flags is a bitfield of `ODB_FOR_EACH_OBJECT_*` flags. Not all flags may + * apply to a specific backend, so whether or not they are honored is defined + * by the implementation. + * + * Returns 0 when all objects have been iterated over, a negative error code in + * case iteration has failed, or a non-zero value returned from the callback. + */ +static inline int odb_source_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + unsigned flags) +{ + return source->for_each_object(source, request, cb, cb_data, flags); +} + #endif -- cgit v1.3 From 7ae23630c3ed012180edc88f0a9615a0d570a77c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Thu, 5 Mar 2026 15:19:55 +0100 Subject: odb/source: make `read_alternates()` function pluggable Introduce a new callback function in `struct odb_source` to make the function pluggable. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 26 ++++---------------------- odb.h | 5 +++++ odb/source-files.c | 22 ++++++++++++++++++++++ odb/source.h | 28 ++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 22 deletions(-) (limited to 'odb.h') diff --git a/odb.c b/odb.c index f439de9db2..d9424cdfd0 100644 --- a/odb.c +++ b/odb.c @@ -131,10 +131,10 @@ out: return usable; } -static void parse_alternates(const char *string, - int sep, - const char *relative_base, - struct strvec *out) +void parse_alternates(const char *string, + int sep, + const char *relative_base, + struct strvec *out) { struct strbuf pathbuf = STRBUF_INIT; struct strbuf buf = STRBUF_INIT; @@ -198,24 +198,6 @@ static void parse_alternates(const char *string, strbuf_release(&buf); } -static void odb_source_read_alternates(struct odb_source *source, - struct strvec *out) -{ - struct strbuf buf = STRBUF_INIT; - char *path; - - path = xstrfmt("%s/info/alternates", source->path); - if (strbuf_read_file(&buf, path, 1024) < 0) { - warn_on_fopen_errors(path); - free(path); - return; - } - parse_alternates(buf.buf, '\n', source->path, out); - - strbuf_release(&buf); - free(path); -} - static struct odb_source *odb_add_alternate_recursively(struct object_database *odb, const char *source, int depth) diff --git a/odb.h b/odb.h index 692d9029ef..86e0365c24 100644 --- a/odb.h +++ b/odb.h @@ -500,4 +500,9 @@ int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, struct object_id *oid); +void parse_alternates(const char *string, + int sep, + const char *relative_base, + struct strvec *out); + #endif /* ODB_H */ diff --git a/odb/source-files.c b/odb/source-files.c index b8844f11b7..199c55cfa4 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -2,9 +2,11 @@ #include "abspath.h" #include "chdir-notify.h" #include "object-file.h" +#include "odb.h" #include "odb/source.h" #include "odb/source-files.h" #include "packfile.h" +#include "strbuf.h" static void odb_source_files_reparent(const char *name UNUSED, const char *old_cwd, @@ -117,6 +119,25 @@ static int odb_source_files_write_object_stream(struct odb_source *source, return odb_source_loose_write_stream(source, stream, len, oid); } +static int odb_source_files_read_alternates(struct odb_source *source, + struct strvec *out) +{ + struct strbuf buf = STRBUF_INIT; + char *path; + + path = xstrfmt("%s/info/alternates", source->path); + if (strbuf_read_file(&buf, path, 1024) < 0) { + warn_on_fopen_errors(path); + free(path); + return 0; + } + parse_alternates(buf.buf, '\n', source->path, out); + + strbuf_release(&buf); + free(path); + return 0; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -137,6 +158,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; + files->base.read_alternates = odb_source_files_read_alternates; /* * Ideally, we would only ever store absolute paths in the source. This diff --git a/odb/source.h b/odb/source.h index 6c8bec1912..fbdddcb2eb 100644 --- a/odb/source.h +++ b/odb/source.h @@ -54,6 +54,7 @@ struct object_id; struct object_info; struct odb_read_stream; struct odb_write_stream; +struct strvec; /* * A callback function that can be used to iterate through objects. If given, @@ -231,6 +232,19 @@ struct odb_source { int (*write_object_stream)(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); + + /* + * This callback is expected to read the list of alternate object + * database sources connected to it and write them into the `strvec`. + * + * The result is expected to be paths to the alternates. All paths must + * be resolved to absolute paths. + * + * The callback is expected to return 0 on success, a negative error + * code otherwise. + */ + int (*read_alternates)(struct odb_source *source, + struct strvec *out); }; /* @@ -384,4 +398,18 @@ static inline int odb_source_write_object_stream(struct odb_source *source, return source->write_object_stream(source, stream, len, oid); } +/* + * Read the list of alternative object database sources from the given backend + * and populate the `strvec` with them. The listing is not recursive -- that + * is, if any of the yielded alternate sources has alternates itself, those + * will not be yielded as part of this function call. + * + * Return 0 on success, a negative error code otherwise. + */ +static inline int odb_source_read_alternates(struct odb_source *source, + struct strvec *out) +{ + return source->read_alternates(source, out); +} + #endif -- cgit v1.3