From 41f43b8243f42b9df2e98be8460646d4c0100ad3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Fri, 6 Dec 2024 11:27:19 +0100 Subject: global: mark code units that generate warnings with `-Wsign-compare` Mark code units that generate warnings with `-Wsign-compare`. This allows for a structured approach to get rid of all such warnings over time in a way that can be easily measured. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- csum-file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'csum-file.c') diff --git a/csum-file.c b/csum-file.c index c203ebf11b..c14bacc7f9 100644 --- a/csum-file.c +++ b/csum-file.c @@ -9,6 +9,7 @@ */ #define USE_THE_REPOSITORY_VARIABLE +#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "progress.h" -- cgit v1.3-5-g9baa From ba8f6018b5bed4fc58f8dfe2f9714d22398b06fe Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 6 Dec 2024 11:27:22 +0100 Subject: csum-file: fix -Wsign-compare warning on 32-bit platform On 32-bit platforms, ssize_t may be "int" while size_t may be "unsigned int". At times we compare the number of bytes we read stored in a ssize_t variable with "unsigned int", but that is done after we check that we did not get an error return (which is negative---and that is the whole reason why we used ssize_t and not size_t), so these comparisons are safe. But compilers may not realize that. Cast these to size_t to work around the false positives. On platforms with size_t/ssize_t wider than a normal int, this won't be an issue. Signed-off-by: Junio C Hamano Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- csum-file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'csum-file.c') diff --git a/csum-file.c b/csum-file.c index c14bacc7f9..5716016e12 100644 --- a/csum-file.c +++ b/csum-file.c @@ -9,7 +9,6 @@ */ #define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" #include "progress.h" @@ -24,7 +23,7 @@ static void verify_buffer_or_die(struct hashfile *f, if (ret < 0) die_errno("%s: sha1 file read error", f->name); - if (ret != count) + if ((size_t)ret != count) die("%s: sha1 file truncated", f->name); if (memcmp(buf, f->check_buffer, count)) die("sha1 file '%s' validation error", f->name); -- cgit v1.3-5-g9baa From 48524fac643afd7ec70d43684902598ad6d5b954 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:23 -0500 Subject: csum-file: store the hash algorithm as a struct field Throughout the hashfile API, we rely on a reference to 'the_hash_algo', and call its _unsafe function variants directly. Prepare for a future change where we may use a different 'git_hash_algo' pointer (instead of just relying on 'the_hash_algo' throughout) by making the 'git_hash_algo' pointer a member of the 'hashfile' structure itself. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- csum-file.c | 20 +++++++++++--------- csum-file.h | 1 + 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'csum-file.c') diff --git a/csum-file.c b/csum-file.c index 5716016e12..b28cd047e3 100644 --- a/csum-file.c +++ b/csum-file.c @@ -50,7 +50,7 @@ void hashflush(struct hashfile *f) if (offset) { if (!f->skip_hash) - the_hash_algo->unsafe_update_fn(&f->ctx, f->buffer, offset); + f->algop->unsafe_update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -71,14 +71,14 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, hashflush(f); if (f->skip_hash) - hashclr(f->buffer, the_repository->hash_algo); + hashclr(f->buffer, f->algop); else - the_hash_algo->unsafe_final_fn(f->buffer, &f->ctx); + f->algop->unsafe_final_fn(f->buffer, &f->ctx); if (result) - hashcpy(result, f->buffer, the_repository->hash_algo); + hashcpy(result, f->buffer, f->algop); if (flags & CSUM_HASH_IN_STREAM) - flush(f, f->buffer, the_hash_algo->rawsz); + flush(f, f->buffer, f->algop->rawsz); if (flags & CSUM_FSYNC) fsync_component_or_die(component, f->fd, f->name); if (flags & CSUM_CLOSE) { @@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) * f->offset is necessarily zero. */ if (!f->skip_hash) - the_hash_algo->unsafe_update_fn(&f->ctx, buf, nr); + f->algop->unsafe_update_fn(&f->ctx, buf, nr); flush(f, buf, nr); } else { /* @@ -174,7 +174,9 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->name = name; f->do_crc = 0; f->skip_hash = 0; - the_hash_algo->unsafe_init_fn(&f->ctx); + + f->algop = the_hash_algo; + f->algop->unsafe_init_fn(&f->ctx); f->buffer_len = buffer_len; f->buffer = xmalloc(buffer_len); @@ -208,7 +210,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo { hashflush(f); checkpoint->offset = f->total; - the_hash_algo->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); + f->algop->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); } int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint) @@ -219,7 +221,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint lseek(f->fd, offset, SEEK_SET) != offset) return -1; f->total = offset; - the_hash_algo->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); + f->algop->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); f->offset = 0; /* hashflush() was called in checkpoint */ return 0; } diff --git a/csum-file.h b/csum-file.h index 7c73da0a40..2b45f4673a 100644 --- a/csum-file.h +++ b/csum-file.h @@ -20,6 +20,7 @@ struct hashfile { size_t buffer_len; unsigned char *buffer; unsigned char *check_buffer; + const struct git_hash_algo *algop; /** * If non-zero, skip_hash indicates that we should -- cgit v1.3-5-g9baa From 5fcc683338e947d1226a9426174e7c48ce849c47 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:26 -0500 Subject: csum-file.c: extract algop from hashfile_checksum_valid() Perform a similar transformation as in the previous commit, but focused instead on hashfile_checksum_valid(). This function does not work with a hashfile structure itself, and instead validates the raw contents of a file written using the hashfile API. We'll want to be prepared for a similar change to this function in the future, so prepare ourselves for that by extracting 'the_hash_algo' into its own field for use within this function. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- csum-file.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'csum-file.c') diff --git a/csum-file.c b/csum-file.c index b28cd047e3..7a71121e34 100644 --- a/csum-file.c +++ b/csum-file.c @@ -242,14 +242,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len) { unsigned char got[GIT_MAX_RAWSZ]; git_hash_ctx ctx; - size_t data_len = total_len - the_hash_algo->rawsz; + const struct git_hash_algo *algop = the_hash_algo; + size_t data_len = total_len - algop->rawsz; - if (total_len < the_hash_algo->rawsz) + if (total_len < algop->rawsz) return 0; /* say "too short"? */ - the_hash_algo->unsafe_init_fn(&ctx); - the_hash_algo->unsafe_update_fn(&ctx, data, data_len); - the_hash_algo->unsafe_final_fn(got, &ctx); + algop->unsafe_init_fn(&ctx); + algop->unsafe_update_fn(&ctx, data, data_len); + algop->unsafe_final_fn(got, &ctx); - return hasheq(got, data + data_len, the_repository->hash_algo); + return hasheq(got, data + data_len, algop); } -- cgit v1.3-5-g9baa From f0c266af4ea7e4d9b84955f8fed8ee8cb009cbd8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:33 -0500 Subject: csum-file.c: use unsafe_hash_algo() Instead of calling the unsafe_ hash function variants directly, make use of the shared 'algop' pointer by initializing it to: f->algop = unsafe_hash_algo(the_hash_algo); , thus making all calls use the unsafe variants directly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- csum-file.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'csum-file.c') diff --git a/csum-file.c b/csum-file.c index 7a71121e34..ebffc80ef7 100644 --- a/csum-file.c +++ b/csum-file.c @@ -50,7 +50,7 @@ void hashflush(struct hashfile *f) if (offset) { if (!f->skip_hash) - f->algop->unsafe_update_fn(&f->ctx, f->buffer, offset); + f->algop->update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -73,7 +73,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, if (f->skip_hash) hashclr(f->buffer, f->algop); else - f->algop->unsafe_final_fn(f->buffer, &f->ctx); + f->algop->final_fn(f->buffer, &f->ctx); if (result) hashcpy(result, f->buffer, f->algop); @@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count) * f->offset is necessarily zero. */ if (!f->skip_hash) - f->algop->unsafe_update_fn(&f->ctx, buf, nr); + f->algop->update_fn(&f->ctx, buf, nr); flush(f, buf, nr); } else { /* @@ -175,8 +175,8 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->do_crc = 0; f->skip_hash = 0; - f->algop = the_hash_algo; - f->algop->unsafe_init_fn(&f->ctx); + f->algop = unsafe_hash_algo(the_hash_algo); + f->algop->init_fn(&f->ctx); f->buffer_len = buffer_len; f->buffer = xmalloc(buffer_len); @@ -210,7 +210,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo { hashflush(f); checkpoint->offset = f->total; - f->algop->unsafe_clone_fn(&checkpoint->ctx, &f->ctx); + f->algop->clone_fn(&checkpoint->ctx, &f->ctx); } int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint) @@ -221,7 +221,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint lseek(f->fd, offset, SEEK_SET) != offset) return -1; f->total = offset; - f->algop->unsafe_clone_fn(&f->ctx, &checkpoint->ctx); + f->algop->clone_fn(&f->ctx, &checkpoint->ctx); f->offset = 0; /* hashflush() was called in checkpoint */ return 0; } @@ -242,15 +242,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len) { unsigned char got[GIT_MAX_RAWSZ]; git_hash_ctx ctx; - const struct git_hash_algo *algop = the_hash_algo; + const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo); size_t data_len = total_len - algop->rawsz; if (total_len < algop->rawsz) return 0; /* say "too short"? */ - algop->unsafe_init_fn(&ctx); - algop->unsafe_update_fn(&ctx, data, data_len); - algop->unsafe_final_fn(got, &ctx); + algop->init_fn(&ctx); + algop->update_fn(&ctx, data, data_len); + algop->final_fn(got, &ctx); return hasheq(got, data + data_len, algop); } -- cgit v1.3-5-g9baa From a8dd3821fe4fcf1524537ef97e4f5e2cf68ce949 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Jan 2025 12:34:39 -0500 Subject: csum-file: introduce hashfile_checkpoint_init() In 106140a99f (builtin/fast-import: fix segfault with unsafe SHA1 backend, 2024-12-30) and 9218c0bfe1 (bulk-checkin: fix segfault with unsafe SHA1 backend, 2024-12-30), we observed the effects of failing to initialize a hashfile_checkpoint with the same hash function implementation as is used by the hashfile it is used to checkpoint. While both 106140a99f and 9218c0bfe1 work around the immediate crash, changing the hash function implementation within the hashfile API to, for example, the non-unsafe variant would re-introduce the crash. This is a result of the tight coupling between initializing hashfiles and hashfile_checkpoints. Introduce and use a new function which ensures that both parts of a hashfile and hashfile_checkpoint pair use the same hash function implementation to avoid such crashes. A few things worth noting: - In the change to builtin/fast-import.c::stream_blob(), we can see that by removing the explicit reference to 'the_hash_algo->unsafe_init_fn()', we are hardened against the hashfile API changing away from the_hash_algo (or its unsafe variant) in the future. - The bulk-checkin code no longer needs to explicitly zero-initialize the hashfile_checkpoint, since it is now done as a result of calling 'hashfile_checkpoint_init()'. - Also in the bulk-checkin code, we add an additional call to prepare_to_stream() outside of the main loop in order to initialize 'state->f' so we know which hash function implementation to use when calling 'hashfile_checkpoint_init()'. This is OK, since subsequent 'prepare_to_stream()' calls are noops. However, we only need to call 'prepare_to_stream()' when we have the HASH_WRITE_OBJECT bit set in our flags. Without that bit, calling 'prepare_to_stream()' does not assign 'state->f', so we have nothing to initialize. - Other uses of the 'checkpoint' in 'deflate_blob_to_pack()' are appropriately guarded. Helped-by: Patrick Steinhardt Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 2 +- bulk-checkin.c | 9 ++++++--- csum-file.c | 7 +++++++ csum-file.h | 1 + 4 files changed, 15 insertions(+), 4 deletions(-) (limited to 'csum-file.c') diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 0f86392761..4a6c7ab52a 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1106,7 +1106,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size) cycle_packfile(); - the_hash_algo->unsafe_init_fn(&checkpoint.ctx); + hashfile_checkpoint_init(pack_file, &checkpoint); hashfile_checkpoint(pack_file, &checkpoint); offset = checkpoint.offset; diff --git a/bulk-checkin.c b/bulk-checkin.c index 433070a3bd..892176d23d 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -261,7 +261,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, git_hash_ctx ctx; unsigned char obuf[16384]; unsigned header_len; - struct hashfile_checkpoint checkpoint = {0}; + struct hashfile_checkpoint checkpoint; struct pack_idx_entry *idx = NULL; seekback = lseek(fd, 0, SEEK_CUR); @@ -272,12 +272,15 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state, OBJ_BLOB, size); the_hash_algo->init_fn(&ctx); the_hash_algo->update_fn(&ctx, obuf, header_len); - the_hash_algo->unsafe_init_fn(&checkpoint.ctx); /* Note: idx is non-NULL when we are writing */ - if ((flags & HASH_WRITE_OBJECT) != 0) + if ((flags & HASH_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); + prepare_to_stream(state, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + already_hashed_to = 0; while (1) { diff --git a/csum-file.c b/csum-file.c index ebffc80ef7..232121f415 100644 --- a/csum-file.c +++ b/csum-file.c @@ -206,6 +206,13 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp return hashfd_internal(fd, name, tp, 8 * 1024); } +void hashfile_checkpoint_init(struct hashfile *f, + struct hashfile_checkpoint *checkpoint) +{ + memset(checkpoint, 0, sizeof(*checkpoint)); + f->algop->init_fn(&checkpoint->ctx); +} + void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint) { hashflush(f); diff --git a/csum-file.h b/csum-file.h index 2b45f4673a..b7475f16c2 100644 --- a/csum-file.h +++ b/csum-file.h @@ -36,6 +36,7 @@ struct hashfile_checkpoint { git_hash_ctx ctx; }; +void hashfile_checkpoint_init(struct hashfile *, struct hashfile_checkpoint *); void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint *); int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *); -- cgit v1.3-5-g9baa