From 7a4bd1b836c7437dfb8ff3650096750b98a6b3e4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Jan 2026 10:00:42 +0100 Subject: packfile: always declare object info to be OI_PACKED When reading object info via a packfile we yield one of two types: - The object can either be OI_PACKED, which is what a caller would typically expect. - Or it can be OI_DBCACHED if it is stored in the delta base cache. The latter really is an implementation detail though, and callers typically don't care at all about the difference. Furthermore, the information whether or not it is part of the delta base cache can already be derived via the `is_delta` field, so the fact that we discern between OI_PACKED and OI_DBCACHED only further complicates the interface. There aren't all that many callers that care about the `whence` field in the first place. In fact, there's only three: - `packfile_store_read_object_info()` checks for `whence == OI_PACKED` and then populates the packfile information of the object info structure. We now start to do this also for deltified objects, which gives its callers strictly more information. - `repack_local_links()` wants to determine whether the object is part of a promisor pack and checks for `whence == OI_PACKED`. If so, it verifies that the packfile is a promisor pack. It's arguably wrong to declare that an object is not part of a promisor pack only because it is stored in the delta base cache. - `is_not_in_promisor_pack_obj()` does the same, but checks that a specific object is _not_ part of a promisor pack. The same reasoning as above applies. Drop the OI_DBCACHED enum completely. None of the callers seem to care about the distinction. Note that this also fixes a segfault introduced in 8c1b84bc97 (streaming: move logic to read packed objects streams into backend, 2025-11-23), which refactors how we stream packed objects. The intent is to only read packed objects in case they are stored non-deltified as we'd otherwise have to deflate them first. But the check for whether or not the object is stored as a delta was unconditionally done via `oi.u.packed.is_delta`, which is only valid in case `oi.whence` is `OI_PACKED`. But under some circumstances we got `OI_DBCACHED` here, which means that none of the `oi.u.packed` fields were initialized at all. Consequently, we assumed the object was not stored as a delta, and then try to read the object from `oi.u.packed.pack`, which is a `NULL` pointer and thus causes a segfault. Add a test case for this issue so that this cannot regress in the future anymore. Reported-by: Matt Smiley Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index 08a0863fc3..b0c6665c87 100644 --- a/packfile.c +++ b/packfile.c @@ -1656,8 +1656,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, oidclr(oi->delta_base_oid, p->repo->hash_algo); } - oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : - OI_PACKED; + oi->whence = OI_PACKED; out: unuse_pack(&w_curs); -- cgit v1.3 From 27d9486cbc37a44565e4a97a84089c85741d4cd8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Jan 2026 10:00:43 +0100 Subject: packfile: extend `is_delta` field to allow for "unknown" state The `struct object_info::u::packed::is_delta` field determines whether or not a specific object is stored as a delta. It only stores whether or not the object is stored as delta, so it is treated as a boolean value. This boolean is insufficient though: when reading a packed object via `packfile_store_read_object_info()` we know to skip parsing the actual object when the user didn't request any object-specific data. In that case we won't read the object itself, but will only look up its position in the packfile. Consequently, we do not know whether it is a delta or not. This isn't really an issue right now, as the check for an empty request is broken. But a subsequent commit will fix it, and once we do we will have the need to also represent an "unknown" delta state. Prepare for this change by introducing a new enum that encodes the object type. We don't use the "unknown" state just yet, but will start to do so in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.h | 7 ++++++- packfile.c | 17 ++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'packfile.c') diff --git a/odb.h b/odb.h index 73b0b87ad5..afae5e5c01 100644 --- a/odb.h +++ b/odb.h @@ -343,7 +343,12 @@ struct object_info { struct { struct packed_git *pack; off_t offset; - unsigned int is_delta; + enum packed_object_type { + PACKED_OBJECT_TYPE_UNKNOWN, + PACKED_OBJECT_TYPE_FULL, + PACKED_OBJECT_TYPE_OFS_DELTA, + PACKED_OBJECT_TYPE_REF_DELTA, + } type; } packed; } u; }; diff --git a/packfile.c b/packfile.c index b0c6665c87..cc797b2b6a 100644 --- a/packfile.c +++ b/packfile.c @@ -2159,8 +2159,18 @@ int packfile_store_read_object_info(struct packfile_store *store, if (oi->whence == OI_PACKED) { oi->u.packed.offset = e.offset; oi->u.packed.pack = e.p; - oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || - rtype == OBJ_OFS_DELTA); + + switch (rtype) { + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; + } } return 0; @@ -2531,7 +2541,8 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, oi.sizep = &size; if (packfile_store_read_object_info(store, oid, &oi, 0) || - oi.u.packed.is_delta || + oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || + oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || repo_settings_get_big_file_threshold(store->odb->repo) >= size) return -1; -- cgit v1.3 From 57c168dc3824f1aaad553f90f55fdc86b75de561 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Jan 2026 10:00:44 +0100 Subject: packfile: always populate pack-specific info when reading object info When reading object information via `packed_object_info()` we may not populate the object info's packfile-specific fields. This leads to inconsistent object info depending on whether the info was populated via `packfile_store_read_object_info()` or `packed_object_info()`. Fix this inconsistency so that we can always assume the pack info to be populated when reading object info from a pack. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index cc797b2b6a..f7c33a2f77 100644 --- a/packfile.c +++ b/packfile.c @@ -1657,6 +1657,20 @@ int packed_object_info(struct repository *r, struct packed_git *p, } oi->whence = OI_PACKED; + oi->u.packed.offset = obj_offset; + oi->u.packed.pack = p; + + switch (type) { + case OBJ_REF_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; + break; + case OBJ_OFS_DELTA: + oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; + break; + default: + oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; + break; + } out: unuse_pack(&w_curs); @@ -2156,23 +2170,6 @@ int packfile_store_read_object_info(struct packfile_store *store, return -1; } - if (oi->whence == OI_PACKED) { - oi->u.packed.offset = e.offset; - oi->u.packed.pack = e.p; - - switch (rtype) { - case OBJ_REF_DELTA: - oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; - break; - case OBJ_OFS_DELTA: - oi->u.packed.type = PACKED_OBJECT_TYPE_OFS_DELTA; - break; - default: - oi->u.packed.type = PACKED_OBJECT_TYPE_FULL; - break; - } - } - return 0; } -- cgit v1.3 From 8908c303da91400e8d9643da6fc697a081ac7374 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Jan 2026 10:00:45 +0100 Subject: packfile: disentangle return value of `packed_object_info()` The `packed_object_info()` function returns the type of the packed object. While we use an `enum object_type` to store the return value, this type is not to be confused with the actual object type. It _may_ contain the object type, but it may just as well encode that the given packed object is stored as a delta. We have removed the only caller that relied on this returned object type in the preceding commit, so let's simplify semantics and return either 0 on success or a negative error code otherwise. This unblocks a small optimization where we can skip reading the object type altogether. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 21 ++++++++++++--------- packfile.h | 4 ++++ 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index f7c33a2f77..8c6ef45a67 100644 --- a/packfile.c +++ b/packfile.c @@ -1587,6 +1587,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, unsigned long size; off_t curpos = obj_offset; enum object_type type; + int ret; /* * We always get the representation type, but only convert it to @@ -1607,12 +1608,12 @@ int packed_object_info(struct repository *r, struct packed_git *p, off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, type, obj_offset); if (!base_offset) { - type = OBJ_BAD; + ret = -1; goto out; } *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); if (*oi->sizep == 0) { - type = OBJ_BAD; + ret = -1; goto out; } } else { @@ -1625,7 +1626,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { error("could not find object at offset %"PRIuMAX" " "in pack %s", (uintmax_t)obj_offset, p->pack_name); - type = OBJ_BAD; + ret = -1; goto out; } @@ -1639,7 +1640,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (oi->typep) *oi->typep = ptot; if (ptot < 0) { - type = OBJ_BAD; + ret = -1; goto out; } } @@ -1649,7 +1650,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (get_delta_base_oid(p, &w_curs, curpos, oi->delta_base_oid, type, obj_offset) < 0) { - type = OBJ_BAD; + ret = -1; goto out; } } else @@ -1672,9 +1673,11 @@ int packed_object_info(struct repository *r, struct packed_git *p, break; } + ret = 0; + out: unuse_pack(&w_curs); - return type; + return ret; } static void *unpack_compressed_entry(struct packed_git *p, @@ -2152,7 +2155,7 @@ int packfile_store_read_object_info(struct packfile_store *store, unsigned flags UNUSED) { struct pack_entry e; - int rtype; + int ret; if (!find_pack_entry(store->odb->repo, oid, &e)) return 1; @@ -2164,8 +2167,8 @@ int packfile_store_read_object_info(struct packfile_store *store, if (!oi) return 0; - rtype = packed_object_info(store->odb->repo, e.p, e.offset, oi); - if (rtype < 0) { + ret = packed_object_info(store->odb->repo, e.p, e.offset, oi); + if (ret < 0) { mark_bad_packed_object(e.p, oid); return -1; } diff --git a/packfile.h b/packfile.h index 59d162a3f4..d7cce582af 100644 --- a/packfile.h +++ b/packfile.h @@ -378,6 +378,10 @@ void release_pack_memory(size_t); /* global flag to enable extra checks when accessing packed objects */ extern int do_check_packed_object_crc; +/* + * Look up the object info for a specific offset in the packfile. + * Returns zero on success, a negative error code otherwise. + */ int packed_object_info(struct repository *r, struct packed_git *pack, off_t offset, struct object_info *); -- cgit v1.3 From 5ff29698e077e712740691a1d15e8320115ebdbf Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Jan 2026 10:00:46 +0100 Subject: packfile: skip unpacking object header for disk size requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While most of the object info requests for a packed object require us to unpack its headers, reading its disk size doesn't. We still unpack the object header in that case though, which is unnecessary work. Skip reading the header if only the disk size is requested. This leads to a small speedup when reading disk size, only. The following benchmark was done in the Git repository: Benchmark 1: ./git rev-list --disk-usage HEAD (rev = HEAD~) Time (mean ± σ): 105.2 ms ± 0.6 ms [User: 91.4 ms, System: 13.3 ms] Range (min … max): 103.7 ms … 106.0 ms 27 runs Benchmark 2: ./git rev-list --disk-usage HEAD (rev = HEAD) Time (mean ± σ): 96.7 ms ± 0.4 ms [User: 86.2 ms, System: 10.0 ms] Range (min … max): 96.2 ms … 98.1 ms 30 runs Summary ./git rev-list --disk-usage HEAD (rev = HEAD) ran 1.09 ± 0.01 times faster than ./git rev-list --disk-usage HEAD (rev = HEAD~) Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- packfile.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index 8c6ef45a67..a2ba237ce7 100644 --- a/packfile.c +++ b/packfile.c @@ -1586,7 +1586,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, struct pack_window *w_curs = NULL; unsigned long size; off_t curpos = obj_offset; - enum object_type type; + enum object_type type = OBJ_NONE; int ret; /* @@ -1598,7 +1598,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, &type); if (!*oi->contentp) type = OBJ_BAD; - } else { + } else if (oi->sizep || oi->typep || oi->delta_base_oid) { type = unpack_object_header(p, &w_curs, &curpos, &size); } @@ -1662,6 +1662,9 @@ int packed_object_info(struct repository *r, struct packed_git *p, oi->u.packed.pack = p; switch (type) { + case OBJ_NONE: + oi->u.packed.type = PACKED_OBJECT_TYPE_UNKNOWN; + break; case OBJ_REF_DELTA: oi->u.packed.type = PACKED_OBJECT_TYPE_REF_DELTA; break; -- cgit v1.3 From 12d3b58b5552750f351ded7166b347446d9543f3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 12 Jan 2026 10:00:47 +0100 Subject: packfile: drop repository parameter from `packed_object_info()` The function `packed_object_info()` takes a packfile and offset and returns the object info for the corresponding object. Despite these two parameters though it also takes a repository pointer. This is redundant information though, as `struct packed_git` already has a repository pointer that is always populated. Drop the redundant parameter. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 3 +-- builtin/pack-objects.c | 4 ++-- commit-graph.c | 2 +- pack-bitmap.c | 3 +-- packfile.c | 8 ++++---- packfile.h | 3 +-- 6 files changed, 10 insertions(+), 13 deletions(-) (limited to 'packfile.c') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 505ddaa12f..2ad712e9f8 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -487,8 +487,7 @@ static void batch_object_write(const char *obj_name, data->info.sizep = &data->size; if (pack) - ret = packed_object_info(the_repository, pack, - offset, &data->info); + ret = packed_object_info(pack, offset, &data->info); else ret = odb_read_object_info_extended(the_repository->objects, &data->oid, &data->info, diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 1ce8d6ee21..85762f8c4f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2411,7 +2411,7 @@ static void drop_reused_delta(struct object_entry *entry) oi.sizep = &size; oi.typep = &type; - if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { + if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { /* * We failed to get the info from this pack for some reason; * fall back to odb_read_object_info, which may find another copy. @@ -3748,7 +3748,7 @@ static int add_object_entry_from_pack(const struct object_id *oid, struct object_info oi = OBJECT_INFO_INIT; oi.typep = &type; - if (packed_object_info(the_repository, p, ofs, &oi) < 0) { + if (packed_object_info(p, ofs, &oi) < 0) { die(_("could not get type of object %s in pack %s"), oid_to_hex(oid), p->pack_name); } else if (type == OBJ_COMMIT) { diff --git a/commit-graph.c b/commit-graph.c index 80be2ff2c3..f572670bd0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1499,7 +1499,7 @@ static int add_packed_commits(const struct object_id *oid, display_progress(ctx->progress, ++ctx->progress_done); oi.typep = &type; - if (packed_object_info(ctx->r, pack, offset, &oi) < 0) + if (packed_object_info(pack, offset, &oi) < 0) die(_("unable to get type of object %s"), oid_to_hex(oid)); if (type != OBJ_COMMIT) diff --git a/pack-bitmap.c b/pack-bitmap.c index 8ca79725b1..972203f12b 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1876,8 +1876,7 @@ static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, ofs = pack_pos_to_offset(pack, pos); } - if (packed_object_info(bitmap_repo(bitmap_git), pack, ofs, - &oi) < 0) { + if (packed_object_info(pack, ofs, &oi) < 0) { struct object_id oid; nth_bitmap_object_oid(bitmap_git, &oid, pack_pos_to_index(pack, pos)); diff --git a/packfile.c b/packfile.c index a2ba237ce7..39899aec49 100644 --- a/packfile.c +++ b/packfile.c @@ -1580,7 +1580,7 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, &ent->ent); } -int packed_object_info(struct repository *r, struct packed_git *p, +int packed_object_info(struct packed_git *p, off_t obj_offset, struct object_info *oi) { struct pack_window *w_curs = NULL; @@ -1594,7 +1594,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, * a "real" type later if the caller is interested. */ if (oi->contentp) { - *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep, + *oi->contentp = cache_or_unpack_entry(p->repo, p, obj_offset, oi->sizep, &type); if (!*oi->contentp) type = OBJ_BAD; @@ -1635,7 +1635,7 @@ int packed_object_info(struct repository *r, struct packed_git *p, if (oi->typep) { enum object_type ptot; - ptot = packed_to_object_type(r, p, obj_offset, + ptot = packed_to_object_type(p->repo, p, obj_offset, type, &w_curs, curpos); if (oi->typep) *oi->typep = ptot; @@ -2170,7 +2170,7 @@ int packfile_store_read_object_info(struct packfile_store *store, if (!oi) return 0; - ret = packed_object_info(store->odb->repo, e.p, e.offset, oi); + ret = packed_object_info(e.p, e.offset, oi); if (ret < 0) { mark_bad_packed_object(e.p, oid); return -1; diff --git a/packfile.h b/packfile.h index d7cce582af..33fed26362 100644 --- a/packfile.h +++ b/packfile.h @@ -382,8 +382,7 @@ extern int do_check_packed_object_crc; * Look up the object info for a specific offset in the packfile. * Returns zero on success, a negative error code otherwise. */ -int packed_object_info(struct repository *r, - struct packed_git *pack, +int packed_object_info(struct packed_git *pack, off_t offset, struct object_info *); void mark_bad_packed_object(struct packed_git *, const struct object_id *); -- cgit v1.3