aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2026-03-05 10:04:49 -0800
committerJunio C Hamano <gitster@pobox.com>2026-03-05 10:04:49 -0800
commitd93be9cbcaf80f4d1ff56a3d7aa5d722236b6eac (patch)
tree2febbd0e230148934704a2eebe27fcd7d58780c4
parentdb227bce2224b55b11954a5f292a0b035b7d9279 (diff)
parent13eb65d36615d7269df053015bddf7987ef6d923 (diff)
downloadgit-d93be9cbcaf80f4d1ff56a3d7aa5d722236b6eac.tar.xz
Merge branch 'ps/fsck-stream-from-the-right-object-instance'
"fsck" iterates over packfiles and its access to pack data caused the list to be permuted, which caused it to loop forever; the code to access pack data by "fsck" has been updated to avoid this. * ps/fsck-stream-from-the-right-object-instance: pack-check: fix verification of large objects packfile: expose function to read object stream for an offset object-file: adapt `stream_object_signature()` to take a stream t/helper: improve "genrandom" test helper
-rw-r--r--object-file.c10
-rw-r--r--object-file.h4
-rw-r--r--object.c19
-rw-r--r--pack-check.c12
-rw-r--r--packfile.c40
-rw-r--r--packfile.h5
-rw-r--r--t/helper/test-genrandom.c5
-rwxr-xr-xt/t1006-cat-file.sh2
-rwxr-xr-xt/t1050-large.sh6
-rwxr-xr-xt/t1450-fsck.sh40
-rwxr-xr-xt/t5301-sliding-window.sh2
-rwxr-xr-xt/t5310-pack-bitmaps.sh2
-rwxr-xr-xt/t5710-promisor-remote-capability.sh4
-rwxr-xr-xt/t7700-repack.sh6
14 files changed, 114 insertions, 43 deletions
diff --git a/object-file.c b/object-file.c
index b8265021f9..3094140055 100644
--- a/object-file.c
+++ b/object-file.c
@@ -129,18 +129,15 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
return !oideq(oid, &real_oid) ? -1 : 0;
}
-int stream_object_signature(struct repository *r, const struct object_id *oid)
+int stream_object_signature(struct repository *r,
+ struct odb_read_stream *st,
+ const struct object_id *oid)
{
struct object_id real_oid;
- struct odb_read_stream *st;
struct git_hash_ctx c;
char hdr[MAX_HEADER_LEN];
int hdrlen;
- st = odb_read_stream_open(r->objects, oid, NULL);
- if (!st)
- return -1;
-
/* Generate the header */
hdrlen = format_object_header(hdr, sizeof(hdr), st->type, st->size);
@@ -160,7 +157,6 @@ int stream_object_signature(struct repository *r, const struct object_id *oid)
git_hash_update(&c, buf, readlen);
}
git_hash_final_oid(&real_oid, &c);
- odb_read_stream_close(st);
return !oideq(oid, &real_oid) ? -1 : 0;
}
diff --git a/object-file.h b/object-file.h
index 47fad6663b..ff6da65296 100644
--- a/object-file.h
+++ b/object-file.h
@@ -166,7 +166,9 @@ int check_object_signature(struct repository *r, const struct object_id *oid,
* Try reading the object named with "oid" using
* the streaming interface and rehash it to do the same.
*/
-int stream_object_signature(struct repository *r, const struct object_id *oid);
+int stream_object_signature(struct repository *r,
+ struct odb_read_stream *stream,
+ const struct object_id *oid);
enum finalize_object_file_flags {
FOF_SKIP_COLLISION_CHECK = 1,
diff --git a/object.c b/object.c
index 99b6df3780..465902ecc6 100644
--- a/object.c
+++ b/object.c
@@ -6,6 +6,7 @@
#include "object.h"
#include "replace-object.h"
#include "object-file.h"
+#include "odb/streaming.h"
#include "blob.h"
#include "statinfo.h"
#include "tree.h"
@@ -343,9 +344,21 @@ struct object *parse_object_with_flags(struct repository *r,
if ((!obj || obj->type == OBJ_NONE || obj->type == OBJ_BLOB) &&
odb_read_object_info(r->objects, oid, NULL) == OBJ_BLOB) {
- if (!skip_hash && stream_object_signature(r, repl) < 0) {
- error(_("hash mismatch %s"), oid_to_hex(oid));
- return NULL;
+ if (!skip_hash) {
+ struct odb_read_stream *stream = odb_read_stream_open(r->objects, oid, NULL);
+
+ if (!stream) {
+ error(_("unable to open object stream for %s"), oid_to_hex(oid));
+ return NULL;
+ }
+
+ if (stream_object_signature(r, stream, repl) < 0) {
+ error(_("hash mismatch %s"), oid_to_hex(oid));
+ odb_read_stream_close(stream);
+ return NULL;
+ }
+
+ odb_read_stream_close(stream);
}
parse_blob_buffer(lookup_blob(r, oid));
return lookup_object(r, oid);
diff --git a/pack-check.c b/pack-check.c
index 67cb2cf72f..7378c80730 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -9,6 +9,7 @@
#include "packfile.h"
#include "object-file.h"
#include "odb.h"
+#include "odb/streaming.h"
struct idx_entry {
off_t offset;
@@ -104,6 +105,7 @@ static int verify_packfile(struct repository *r,
QSORT(entries, nr_objects, compare_entries);
for (i = 0; i < nr_objects; i++) {
+ struct odb_read_stream *stream = NULL;
void *data;
struct object_id oid;
enum object_type type;
@@ -152,7 +154,9 @@ static int verify_packfile(struct repository *r,
type) < 0)
err = error("packed %s from %s is corrupt",
oid_to_hex(&oid), p->pack_name);
- else if (!data && stream_object_signature(r, &oid) < 0)
+ else if (!data &&
+ (packfile_read_object_stream(&stream, &oid, p, entries[i].offset) < 0 ||
+ stream_object_signature(r, stream, &oid) < 0))
err = error("packed %s from %s is corrupt",
oid_to_hex(&oid), p->pack_name);
else if (fn) {
@@ -163,12 +167,14 @@ static int verify_packfile(struct repository *r,
}
if (((base_count + i) & 1023) == 0)
display_progress(progress, base_count + i);
- free(data);
+ if (stream)
+ odb_read_stream_close(stream);
+ free(data);
}
+
display_progress(progress, base_count + i);
free(entries);
-
return err;
}
diff --git a/packfile.c b/packfile.c
index ce837f852a..4a6d4a80ea 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2621,32 +2621,28 @@ static int close_istream_pack_non_delta(struct odb_read_stream *_st)
return 0;
}
-int packfile_store_read_object_stream(struct odb_read_stream **out,
- struct packfile_store *store,
- const struct object_id *oid)
+int packfile_read_object_stream(struct odb_read_stream **out,
+ const struct object_id *oid,
+ struct packed_git *pack,
+ off_t offset)
{
struct odb_packed_read_stream *stream;
struct pack_window *window = NULL;
- struct object_info oi = OBJECT_INFO_INIT;
enum object_type in_pack_type;
unsigned long size;
- oi.sizep = &size;
+ in_pack_type = unpack_object_header(pack, &window, &offset, &size);
+ unuse_pack(&window);
- if (packfile_store_read_object_info(store, oid, &oi, 0) ||
- oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA ||
- oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA ||
- repo_settings_get_big_file_threshold(store->source->odb->repo) >= size)
+ if (repo_settings_get_big_file_threshold(pack->repo) >= size)
return -1;
- in_pack_type = unpack_object_header(oi.u.packed.pack,
- &window,
- &oi.u.packed.offset,
- &size);
- unuse_pack(&window);
switch (in_pack_type) {
default:
return -1; /* we do not do deltas for now */
+ case OBJ_BAD:
+ mark_bad_packed_object(pack, oid);
+ return -1;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
@@ -2660,10 +2656,22 @@ int packfile_store_read_object_stream(struct odb_read_stream **out,
stream->base.type = in_pack_type;
stream->base.size = size;
stream->z_state = ODB_PACKED_READ_STREAM_UNINITIALIZED;
- stream->pack = oi.u.packed.pack;
- stream->pos = oi.u.packed.offset;
+ stream->pack = pack;
+ stream->pos = offset;
*out = &stream->base;
return 0;
}
+
+int packfile_store_read_object_stream(struct odb_read_stream **out,
+ struct packfile_store *store,
+ const struct object_id *oid)
+{
+ struct pack_entry e;
+
+ if (!find_pack_entry(store, oid, &e))
+ return -1;
+
+ return packfile_read_object_stream(out, oid, e.p, e.offset);
+}
diff --git a/packfile.h b/packfile.h
index 224142fd34..85ce44f8ee 100644
--- a/packfile.h
+++ b/packfile.h
@@ -449,6 +449,11 @@ off_t get_delta_base(struct packed_git *p, struct pack_window **w_curs,
off_t *curpos, enum object_type type,
off_t delta_obj_offset);
+int packfile_read_object_stream(struct odb_read_stream **out,
+ const struct object_id *oid,
+ struct packed_git *pack,
+ off_t offset);
+
void release_pack_memory(size_t);
/* global flag to enable extra checks when accessing packed objects */
diff --git a/t/helper/test-genrandom.c b/t/helper/test-genrandom.c
index 51b67f2f87..d681961abb 100644
--- a/t/helper/test-genrandom.c
+++ b/t/helper/test-genrandom.c
@@ -6,6 +6,7 @@
#include "test-tool.h"
#include "git-compat-util.h"
+#include "parse.h"
int cmd__genrandom(int argc, const char **argv)
{
@@ -22,7 +23,9 @@ int cmd__genrandom(int argc, const char **argv)
next = next * 11 + *c;
} while (*c++);
- count = (argc == 3) ? strtoul(argv[2], NULL, 0) : ULONG_MAX;
+ count = ULONG_MAX;
+ if (argc == 3 && !git_parse_ulong(argv[2], &count))
+ return error_errno("cannot parse argument '%s'", argv[2]);
while (count--) {
next = next * 1103515245 + 12345;
diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh
index 0eee3bb878..5499be8dc9 100755
--- a/t/t1006-cat-file.sh
+++ b/t/t1006-cat-file.sh
@@ -643,7 +643,7 @@ test_expect_success 'object reference via commit text search' '
'
test_expect_success 'setup blobs which are likely to delta' '
- test-tool genrandom foo 10240 >foo &&
+ test-tool genrandom foo 10k >foo &&
{ cat foo && echo plus; } >foo-plus &&
git add foo foo-plus &&
git commit -m foo &&
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
index 5be273611a..7d40d08521 100755
--- a/t/t1050-large.sh
+++ b/t/t1050-large.sh
@@ -104,9 +104,9 @@ test_expect_success 'packsize limit' '
# mid1 and mid2 will fit within 256k limit but
# appending mid3 will bust the limit and will
# result in a separate packfile.
- test-tool genrandom "a" $(( 66 * 1024 )) >mid1 &&
- test-tool genrandom "b" $(( 80 * 1024 )) >mid2 &&
- test-tool genrandom "c" $(( 128 * 1024 )) >mid3 &&
+ test-tool genrandom "a" 66k >mid1 &&
+ test-tool genrandom "b" 80k >mid2 &&
+ test-tool genrandom "c" 128k >mid3 &&
git add mid1 mid2 mid3 &&
count=0 &&
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 3fae05f9d9..54e81c2636 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -852,6 +852,44 @@ test_expect_success 'fsck errors in packed objects' '
! grep corrupt out
'
+test_expect_success 'fsck handles multiple packfiles with big blobs' '
+ test_when_finished "rm -rf repo" &&
+ git init repo &&
+ (
+ cd repo &&
+
+ # We construct two packfiles with two objects in common and one
+ # object not in common. The objects in common can then be
+ # corrupted in one of the packfiles, respectively. The other
+ # objects that are unique to the packs are merely used to not
+ # have both packs contain the same data.
+ blob_one=$(test-tool genrandom one 200k | git hash-object -t blob -w --stdin) &&
+ blob_two=$(test-tool genrandom two 200k | git hash-object -t blob -w --stdin) &&
+ blob_three=$(test-tool genrandom three 200k | git hash-object -t blob -w --stdin) &&
+ blob_four=$(test-tool genrandom four 200k | git hash-object -t blob -w --stdin) &&
+ pack_one=$(printf "%s\n" "$blob_one" "$blob_two" "$blob_three" | git pack-objects .git/objects/pack/pack) &&
+ pack_two=$(printf "%s\n" "$blob_two" "$blob_three" "$blob_four" | git pack-objects .git/objects/pack/pack) &&
+ chmod a+w .git/objects/pack/pack-*.pack &&
+
+ # Corrupt blob two in the first pack.
+ git verify-pack -v .git/objects/pack/pack-$pack_one >objects &&
+ offset_one=$(sed <objects -n "s/^$blob_two .* \(.*\)$/\1/p") &&
+ printf "\0" | dd of=.git/objects/pack/pack-$pack_one.pack bs=1 conv=notrunc seek=$offset_one &&
+
+ # Corrupt blob three in the second pack.
+ git verify-pack -v .git/objects/pack/pack-$pack_two >objects &&
+ offset_two=$(sed <objects -n "s/^$blob_three .* \(.*\)$/\1/p") &&
+ printf "\0" | dd of=.git/objects/pack/pack-$pack_two.pack bs=1 conv=notrunc seek=$offset_two &&
+
+ # We now expect to see two failures for the corrupted objects,
+ # even though they exist in a non-corrupted form in the
+ # respective other pack.
+ test_must_fail git -c core.bigFileThreshold=100k fsck 2>err &&
+ test_grep "unknown object type 0 at offset $offset_one in .git/objects/pack/pack-$pack_one.pack" err &&
+ test_grep "unknown object type 0 at offset $offset_two in .git/objects/pack/pack-$pack_two.pack" err
+ )
+'
+
test_expect_success 'fsck fails on corrupt packfile' '
hsh=$(git commit-tree -m mycommit HEAD^{tree}) &&
pack=$(echo $hsh | git pack-objects .git/objects/pack/pack) &&
@@ -918,7 +956,7 @@ test_expect_success 'fsck detects trailing loose garbage (large blob)' '
test_expect_success 'fsck detects truncated loose object' '
# make it big enough that we know we will truncate in the data
# portion, not the header
- test-tool genrandom truncate 4096 >file &&
+ test-tool genrandom truncate 4k >file &&
blob=$(git hash-object -w file) &&
file=$(sha1_file $blob) &&
test_when_finished "remove_object $blob" &&
diff --git a/t/t5301-sliding-window.sh b/t/t5301-sliding-window.sh
index ff6b5159a3..3c3666b278 100755
--- a/t/t5301-sliding-window.sh
+++ b/t/t5301-sliding-window.sh
@@ -12,7 +12,7 @@ test_expect_success 'setup' '
for i in a b c
do
echo $i >$i &&
- test-tool genrandom "$i" 32768 >>$i &&
+ test-tool genrandom "$i" 32k >>$i &&
git update-index --add $i || return 1
done &&
echo d >d && cat c >>d && git update-index --add d &&
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index 310b708c5c..f693cb5669 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -242,7 +242,7 @@ test_bitmap_cases () {
'
test_expect_success 'splitting packs does not generate bogus bitmaps' '
- test-tool genrandom foo $((1024 * 1024)) >rand &&
+ test-tool genrandom foo 1m >rand &&
git add rand &&
git commit -m "commit with big file" &&
git -c pack.packSizeLimit=500k repack -adb &&
diff --git a/t/t5710-promisor-remote-capability.sh b/t/t5710-promisor-remote-capability.sh
index 532e6f0fea..357822c01a 100755
--- a/t/t5710-promisor-remote-capability.sh
+++ b/t/t5710-promisor-remote-capability.sh
@@ -20,7 +20,7 @@ test_expect_success 'setup: create "template" repository' '
test_commit -C template 1 &&
test_commit -C template 2 &&
test_commit -C template 3 &&
- test-tool genrandom foo 10240 >template/foo &&
+ test-tool genrandom foo 10k >template/foo &&
git -C template add foo &&
git -C template commit -m foo
'
@@ -499,7 +499,7 @@ test_expect_success "clone with promisor.advertise set to 'true' but don't delet
test_expect_success "setup for subsequent fetches" '
# Generate new commit with large blob
- test-tool genrandom bar 10240 >template/bar &&
+ test-tool genrandom bar 10k >template/bar &&
git -C template add bar &&
git -C template commit -m bar &&
diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh
index acc2589f21..63ef63fc50 100755
--- a/t/t7700-repack.sh
+++ b/t/t7700-repack.sh
@@ -321,7 +321,7 @@ test_expect_success 'no bitmaps created if .keep files present' '
test_expect_success 'auto-bitmaps do not complain if unavailable' '
test_config -C bare.git pack.packSizeLimit 1M &&
- blob=$(test-tool genrandom big $((1024*1024)) |
+ blob=$(test-tool genrandom big 1m |
git -C bare.git hash-object -w --stdin) &&
git -C bare.git update-ref refs/tags/big $blob &&
@@ -497,9 +497,9 @@ test_expect_success '--filter works with --max-pack-size' '
cd max-pack-size &&
test_commit base &&
# two blobs which exceed the maximum pack size
- test-tool genrandom foo 1048576 >foo &&
+ test-tool genrandom foo 1m >foo &&
git hash-object -w foo &&
- test-tool genrandom bar 1048576 >bar &&
+ test-tool genrandom bar 1m >bar &&
git hash-object -w bar &&
git add foo bar &&
git commit -m "adding foo and bar"