From 751530de5db77196a08897e3c47526c0f0147ef1 Mon Sep 17 00:00:00 2001 From: René Scharfe Date: Sat, 11 Sep 2021 22:40:33 +0200 Subject: packfile: convert mark_bad_packed_object() to object_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callers have full object IDs, so pass them on instead of just their hash member. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- packfile.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'packfile.c') diff --git a/packfile.c b/packfile.c index 4d0d625238..fb15fc5b49 100644 --- a/packfile.c +++ b/packfile.c @@ -1161,17 +1161,17 @@ int unpack_object_header(struct packed_git *p, return type; } -void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1) +void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) { unsigned i; const unsigned hashsz = the_hash_algo->rawsz; for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, p->bad_object_sha1 + hashsz * i)) + if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i)) return; p->bad_object_sha1 = xrealloc(p->bad_object_sha1, st_mult(GIT_MAX_RAWSZ, st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1); + hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash); p->num_bad_objects++; } @@ -1272,7 +1272,7 @@ static int retry_bad_packed_offset(struct repository *r, if (offset_to_pack_pos(p, obj_offset, &pos) < 0) return OBJ_BAD; nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos)); - mark_bad_packed_object(p, oid.hash); + mark_bad_packed_object(p, &oid); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) return OBJ_BAD; @@ -1722,7 +1722,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, nth_packed_object_id(&oid, p, index_pos); error("bad packed object CRC for %s", oid_to_hex(&oid)); - mark_bad_packed_object(p, oid.hash); + mark_bad_packed_object(p, &oid); data = NULL; goto out; } @@ -1811,7 +1811,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, p->pack_name); - mark_bad_packed_object(p, base_oid.hash); + mark_bad_packed_object(p, &base_oid); base = read_object(r, &base_oid, &type, &base_size); external_base = base; } -- cgit v1.3-6-g1900 From 7407d733a4a99cf946cab0c82e03c41dbd305b7c Mon Sep 17 00:00:00 2001 From: René Scharfe Date: Sat, 11 Sep 2021 22:42:20 +0200 Subject: packfile: convert has_packed_and_bad() to object_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The single caller has a full object ID, so pass it on instead of just its hash member. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- object-file.c | 2 +- packfile.c | 4 ++-- packfile.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'packfile.c') diff --git a/object-file.c b/object-file.c index fb5a385a06..01e7058b4e 100644 --- a/object-file.c +++ b/object-file.c @@ -1725,7 +1725,7 @@ void *read_object_file_extended(struct repository *r, die(_("loose object %s (stored in %s) is corrupt"), oid_to_hex(repl), path); - if ((p = has_packed_and_bad(r, repl->hash)) != NULL) + if ((p = has_packed_and_bad(r, repl)) != NULL) die(_("packed object %s (stored in %s) is corrupt"), oid_to_hex(repl), p->pack_name); obj_read_unlock(); diff --git a/packfile.c b/packfile.c index fb15fc5b49..04080a558b 100644 --- a/packfile.c +++ b/packfile.c @@ -1176,14 +1176,14 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) } const struct packed_git *has_packed_and_bad(struct repository *r, - const unsigned char *sha1) + const struct object_id *oid) { struct packed_git *p; unsigned i; for (p = r->objects->packed_git; p; p = p->next) for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, + if (hasheq(oid->hash, p->bad_object_sha1 + the_hash_algo->rawsz * i)) return p; return NULL; diff --git a/packfile.h b/packfile.h index a982ed9994..186146779d 100644 --- a/packfile.h +++ b/packfile.h @@ -160,7 +160,7 @@ int packed_object_info(struct repository *r, off_t offset, struct object_info *); void mark_bad_packed_object(struct packed_git *, const struct object_id *); -const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); +const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); #define ON_DISK_KEEP_PACKS 1 #define IN_CORE_KEEP_PACKS 2 -- cgit v1.3-6-g1900 From 09ef66179b943d03cbe0bea0603e5f40574695a1 Mon Sep 17 00:00:00 2001 From: René Scharfe Date: Sat, 11 Sep 2021 22:43:26 +0200 Subject: packfile: use oidset for bad objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store the object ID of broken pack entries in an oidset instead of keeping only their hashes in an unsorted array. The resulting code is shorter and easier to read. It also handles the (hopefully) very rare case of having a high number of bad objects better. Helped-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- midx.c | 10 +++------- object-store.h | 4 ++-- packfile.c | 28 ++++++---------------------- 3 files changed, 11 insertions(+), 31 deletions(-) (limited to 'packfile.c') diff --git a/midx.c b/midx.c index 8cb063023c..76322b713c 100644 --- a/midx.c +++ b/midx.c @@ -307,13 +307,9 @@ int fill_midx_entry(struct repository * r, if (!is_pack_valid(p)) return 0; - if (p->num_bad_objects) { - uint32_t i; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return 0; - } + if (oidset_size(&p->bad_objects) && + oidset_contains(&p->bad_objects, oid)) + return 0; e->offset = nth_midxed_offset(m, pos); e->p = p; diff --git a/object-store.h b/object-store.h index b4dc6668aa..c7bead66f6 100644 --- a/object-store.h +++ b/object-store.h @@ -10,6 +10,7 @@ #include "khash.h" #include "dir.h" #include "oidtree.h" +#include "oidset.h" struct object_directory { struct object_directory *next; @@ -75,9 +76,8 @@ struct packed_git { const void *index_data; size_t index_size; uint32_t num_objects; - uint32_t num_bad_objects; uint32_t crc_offset; - unsigned char *bad_object_sha1; + struct oidset bad_objects; int index_version; time_t mtime; int pack_fd; diff --git a/packfile.c b/packfile.c index 04080a558b..caba29c624 100644 --- a/packfile.c +++ b/packfile.c @@ -1163,29 +1163,17 @@ int unpack_object_header(struct packed_git *p, void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) { - unsigned i; - const unsigned hashsz = the_hash_algo->rawsz; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i)) - return; - p->bad_object_sha1 = xrealloc(p->bad_object_sha1, - st_mult(GIT_MAX_RAWSZ, - st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash); - p->num_bad_objects++; + oidset_insert(&p->bad_objects, oid); } const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { struct packed_git *p; - unsigned i; for (p = r->objects->packed_git; p; p = p->next) - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return p; + if (oidset_contains(&p->bad_objects, oid)) + return p; return NULL; } @@ -2016,13 +2004,9 @@ static int fill_pack_entry(const struct object_id *oid, { off_t offset; - if (p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return 0; - } + if (oidset_size(&p->bad_objects) && + oidset_contains(&p->bad_objects, oid)) + return 0; offset = find_pack_entry_one(oid->hash, p); if (!offset) -- cgit v1.3-6-g1900