From 41f43b8243f42b9df2e98be8460646d4c0100ad3 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Fri, 6 Dec 2024 11:27:19 +0100
Subject: global: mark code units that generate warnings with `-Wsign-compare`

Mark code units that generate warnings with `-Wsign-compare`. This
allows for a structured approach to get rid of all such warnings over
time in a way that can be easily measured.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'csum-file.c')

diff --git a/csum-file.c b/csum-file.c
index c203ebf11b..c14bacc7f9 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -9,6 +9,7 @@
  */
 
 #define USE_THE_REPOSITORY_VARIABLE
+#define DISABLE_SIGN_COMPARE_WARNINGS
 
 #include "git-compat-util.h"
 #include "progress.h"
-- 
cgit v1.3-5-g9baa


From ba8f6018b5bed4fc58f8dfe2f9714d22398b06fe Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Fri, 6 Dec 2024 11:27:22 +0100
Subject: csum-file: fix -Wsign-compare warning on 32-bit platform

On 32-bit platforms, ssize_t may be "int" while size_t may be
"unsigned int".  At times we compare the number of bytes we read
stored in a ssize_t variable with "unsigned int", but that is done
after we check that we did not get an error return (which is
negative---and that is the whole reason why we used ssize_t and not
size_t), so these comparisons are safe.

But compilers may not realize that.  Cast these to size_t to work
around the false positives.  On platforms with size_t/ssize_t wider
than a normal int, this won't be an issue.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'csum-file.c')

diff --git a/csum-file.c b/csum-file.c
index c14bacc7f9..5716016e12 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -9,7 +9,6 @@
  */
 
 #define USE_THE_REPOSITORY_VARIABLE
-#define DISABLE_SIGN_COMPARE_WARNINGS
 
 #include "git-compat-util.h"
 #include "progress.h"
@@ -24,7 +23,7 @@ static void verify_buffer_or_die(struct hashfile *f,
 
 	if (ret < 0)
 		die_errno("%s: sha1 file read error", f->name);
-	if (ret != count)
+	if ((size_t)ret != count)
 		die("%s: sha1 file truncated", f->name);
 	if (memcmp(buf, f->check_buffer, count))
 		die("sha1 file '%s' validation error", f->name);
-- 
cgit v1.3-5-g9baa


From 48524fac643afd7ec70d43684902598ad6d5b954 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Thu, 23 Jan 2025 12:34:23 -0500
Subject: csum-file: store the hash algorithm as a struct field

Throughout the hashfile API, we rely on a reference to 'the_hash_algo',
and call its _unsafe function variants directly.

Prepare for a future change where we may use a different 'git_hash_algo'
pointer (instead of just relying on 'the_hash_algo' throughout) by
making the 'git_hash_algo' pointer a member of the 'hashfile' structure
itself.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c | 20 +++++++++++---------
 csum-file.h |  1 +
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'csum-file.c')

diff --git a/csum-file.c b/csum-file.c
index 5716016e12..b28cd047e3 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -50,7 +50,7 @@ void hashflush(struct hashfile *f)
 
 	if (offset) {
 		if (!f->skip_hash)
-			the_hash_algo->unsafe_update_fn(&f->ctx, f->buffer, offset);
+			f->algop->unsafe_update_fn(&f->ctx, f->buffer, offset);
 		flush(f, f->buffer, offset);
 		f->offset = 0;
 	}
@@ -71,14 +71,14 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
 	hashflush(f);
 
 	if (f->skip_hash)
-		hashclr(f->buffer, the_repository->hash_algo);
+		hashclr(f->buffer, f->algop);
 	else
-		the_hash_algo->unsafe_final_fn(f->buffer, &f->ctx);
+		f->algop->unsafe_final_fn(f->buffer, &f->ctx);
 
 	if (result)
-		hashcpy(result, f->buffer, the_repository->hash_algo);
+		hashcpy(result, f->buffer, f->algop);
 	if (flags & CSUM_HASH_IN_STREAM)
-		flush(f, f->buffer, the_hash_algo->rawsz);
+		flush(f, f->buffer, f->algop->rawsz);
 	if (flags & CSUM_FSYNC)
 		fsync_component_or_die(component, f->fd, f->name);
 	if (flags & CSUM_CLOSE) {
@@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 			 * f->offset is necessarily zero.
 			 */
 			if (!f->skip_hash)
-				the_hash_algo->unsafe_update_fn(&f->ctx, buf, nr);
+				f->algop->unsafe_update_fn(&f->ctx, buf, nr);
 			flush(f, buf, nr);
 		} else {
 			/*
@@ -174,7 +174,9 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
 	f->name = name;
 	f->do_crc = 0;
 	f->skip_hash = 0;
-	the_hash_algo->unsafe_init_fn(&f->ctx);
+
+	f->algop = the_hash_algo;
+	f->algop->unsafe_init_fn(&f->ctx);
 
 	f->buffer_len = buffer_len;
 	f->buffer = xmalloc(buffer_len);
@@ -208,7 +210,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo
 {
 	hashflush(f);
 	checkpoint->offset = f->total;
-	the_hash_algo->unsafe_clone_fn(&checkpoint->ctx, &f->ctx);
+	f->algop->unsafe_clone_fn(&checkpoint->ctx, &f->ctx);
 }
 
 int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
@@ -219,7 +221,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint
 	    lseek(f->fd, offset, SEEK_SET) != offset)
 		return -1;
 	f->total = offset;
-	the_hash_algo->unsafe_clone_fn(&f->ctx, &checkpoint->ctx);
+	f->algop->unsafe_clone_fn(&f->ctx, &checkpoint->ctx);
 	f->offset = 0; /* hashflush() was called in checkpoint */
 	return 0;
 }
diff --git a/csum-file.h b/csum-file.h
index 7c73da0a40..2b45f4673a 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -20,6 +20,7 @@ struct hashfile {
 	size_t buffer_len;
 	unsigned char *buffer;
 	unsigned char *check_buffer;
+	const struct git_hash_algo *algop;
 
 	/**
 	 * If non-zero, skip_hash indicates that we should
-- 
cgit v1.3-5-g9baa


From 5fcc683338e947d1226a9426174e7c48ce849c47 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Thu, 23 Jan 2025 12:34:26 -0500
Subject: csum-file.c: extract algop from hashfile_checksum_valid()

Perform a similar transformation as in the previous commit, but focused
instead on hashfile_checksum_valid(). This function does not work with a
hashfile structure itself, and instead validates the raw contents of a
file written using the hashfile API.

We'll want to be prepared for a similar change to this function in the
future, so prepare ourselves for that by extracting 'the_hash_algo' into
its own field for use within this function.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'csum-file.c')

diff --git a/csum-file.c b/csum-file.c
index b28cd047e3..7a71121e34 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -242,14 +242,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
 {
 	unsigned char got[GIT_MAX_RAWSZ];
 	git_hash_ctx ctx;
-	size_t data_len = total_len - the_hash_algo->rawsz;
+	const struct git_hash_algo *algop = the_hash_algo;
+	size_t data_len = total_len - algop->rawsz;
 
-	if (total_len < the_hash_algo->rawsz)
+	if (total_len < algop->rawsz)
 		return 0; /* say "too short"? */
 
-	the_hash_algo->unsafe_init_fn(&ctx);
-	the_hash_algo->unsafe_update_fn(&ctx, data, data_len);
-	the_hash_algo->unsafe_final_fn(got, &ctx);
+	algop->unsafe_init_fn(&ctx);
+	algop->unsafe_update_fn(&ctx, data, data_len);
+	algop->unsafe_final_fn(got, &ctx);
 
-	return hasheq(got, data + data_len, the_repository->hash_algo);
+	return hasheq(got, data + data_len, algop);
 }
-- 
cgit v1.3-5-g9baa


From f0c266af4ea7e4d9b84955f8fed8ee8cb009cbd8 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Thu, 23 Jan 2025 12:34:33 -0500
Subject: csum-file.c: use unsafe_hash_algo()

Instead of calling the unsafe_ hash function variants directly, make use
of the shared 'algop' pointer by initializing it to:

    f->algop = unsafe_hash_algo(the_hash_algo);

, thus making all calls use the unsafe variants directly.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 csum-file.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'csum-file.c')

diff --git a/csum-file.c b/csum-file.c
index 7a71121e34..ebffc80ef7 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -50,7 +50,7 @@ void hashflush(struct hashfile *f)
 
 	if (offset) {
 		if (!f->skip_hash)
-			f->algop->unsafe_update_fn(&f->ctx, f->buffer, offset);
+			f->algop->update_fn(&f->ctx, f->buffer, offset);
 		flush(f, f->buffer, offset);
 		f->offset = 0;
 	}
@@ -73,7 +73,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
 	if (f->skip_hash)
 		hashclr(f->buffer, f->algop);
 	else
-		f->algop->unsafe_final_fn(f->buffer, &f->ctx);
+		f->algop->final_fn(f->buffer, &f->ctx);
 
 	if (result)
 		hashcpy(result, f->buffer, f->algop);
@@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 			 * f->offset is necessarily zero.
 			 */
 			if (!f->skip_hash)
-				f->algop->unsafe_update_fn(&f->ctx, buf, nr);
+				f->algop->update_fn(&f->ctx, buf, nr);
 			flush(f, buf, nr);
 		} else {
 			/*
@@ -175,8 +175,8 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
 	f->do_crc = 0;
 	f->skip_hash = 0;
 
-	f->algop = the_hash_algo;
-	f->algop->unsafe_init_fn(&f->ctx);
+	f->algop = unsafe_hash_algo(the_hash_algo);
+	f->algop->init_fn(&f->ctx);
 
 	f->buffer_len = buffer_len;
 	f->buffer = xmalloc(buffer_len);
@@ -210,7 +210,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo
 {
 	hashflush(f);
 	checkpoint->offset = f->total;
-	f->algop->unsafe_clone_fn(&checkpoint->ctx, &f->ctx);
+	f->algop->clone_fn(&checkpoint->ctx, &f->ctx);
 }
 
 int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
@@ -221,7 +221,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint
 	    lseek(f->fd, offset, SEEK_SET) != offset)
 		return -1;
 	f->total = offset;
-	f->algop->unsafe_clone_fn(&f->ctx, &checkpoint->ctx);
+	f->algop->clone_fn(&f->ctx, &checkpoint->ctx);
 	f->offset = 0; /* hashflush() was called in checkpoint */
 	return 0;
 }
@@ -242,15 +242,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
 {
 	unsigned char got[GIT_MAX_RAWSZ];
 	git_hash_ctx ctx;
-	const struct git_hash_algo *algop = the_hash_algo;
+	const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo);
 	size_t data_len = total_len - algop->rawsz;
 
 	if (total_len < algop->rawsz)
 		return 0; /* say "too short"? */
 
-	algop->unsafe_init_fn(&ctx);
-	algop->unsafe_update_fn(&ctx, data, data_len);
-	algop->unsafe_final_fn(got, &ctx);
+	algop->init_fn(&ctx);
+	algop->update_fn(&ctx, data, data_len);
+	algop->final_fn(got, &ctx);
 
 	return hasheq(got, data + data_len, algop);
 }
-- 
cgit v1.3-5-g9baa


From a8dd3821fe4fcf1524537ef97e4f5e2cf68ce949 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Thu, 23 Jan 2025 12:34:39 -0500
Subject: csum-file: introduce hashfile_checkpoint_init()

In 106140a99f (builtin/fast-import: fix segfault with unsafe SHA1
backend, 2024-12-30) and 9218c0bfe1 (bulk-checkin: fix segfault with
unsafe SHA1 backend, 2024-12-30), we observed the effects of failing to
initialize a hashfile_checkpoint with the same hash function
implementation as is used by the hashfile it is used to checkpoint.

While both 106140a99f and 9218c0bfe1 work around the immediate crash,
changing the hash function implementation within the hashfile API to,
for example, the non-unsafe variant would re-introduce the crash. This
is a result of the tight coupling between initializing hashfiles and
hashfile_checkpoints.

Introduce and use a new function which ensures that both parts of a
hashfile and hashfile_checkpoint pair use the same hash function
implementation to avoid such crashes.

A few things worth noting:

  - In the change to builtin/fast-import.c::stream_blob(), we can see
    that by removing the explicit reference to
    'the_hash_algo->unsafe_init_fn()', we are hardened against the
    hashfile API changing away from the_hash_algo (or its unsafe
    variant) in the future.

  - The bulk-checkin code no longer needs to explicitly zero-initialize
    the hashfile_checkpoint, since it is now done as a result of calling
    'hashfile_checkpoint_init()'.

  - Also in the bulk-checkin code, we add an additional call to
    prepare_to_stream() outside of the main loop in order to initialize
    'state->f' so we know which hash function implementation to use when
    calling 'hashfile_checkpoint_init()'.

    This is OK, since subsequent 'prepare_to_stream()' calls are noops.
    However, we only need to call 'prepare_to_stream()' when we have the
    HASH_WRITE_OBJECT bit set in our flags. Without that bit, calling
    'prepare_to_stream()' does not assign 'state->f', so we have nothing
    to initialize.

  - Other uses of the 'checkpoint' in 'deflate_blob_to_pack()' are
    appropriately guarded.

Helped-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fast-import.c | 2 +-
 bulk-checkin.c        | 9 ++++++---
 csum-file.c           | 7 +++++++
 csum-file.h           | 1 +
 4 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'csum-file.c')

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 0f86392761..4a6c7ab52a 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1106,7 +1106,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
 		|| (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size)
 		cycle_packfile();
 
-	the_hash_algo->unsafe_init_fn(&checkpoint.ctx);
+	hashfile_checkpoint_init(pack_file, &checkpoint);
 	hashfile_checkpoint(pack_file, &checkpoint);
 	offset = checkpoint.offset;
 
diff --git a/bulk-checkin.c b/bulk-checkin.c
index 433070a3bd..892176d23d 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -261,7 +261,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 	git_hash_ctx ctx;
 	unsigned char obuf[16384];
 	unsigned header_len;
-	struct hashfile_checkpoint checkpoint = {0};
+	struct hashfile_checkpoint checkpoint;
 	struct pack_idx_entry *idx = NULL;
 
 	seekback = lseek(fd, 0, SEEK_CUR);
@@ -272,12 +272,15 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
 					  OBJ_BLOB, size);
 	the_hash_algo->init_fn(&ctx);
 	the_hash_algo->update_fn(&ctx, obuf, header_len);
-	the_hash_algo->unsafe_init_fn(&checkpoint.ctx);
 
 	/* Note: idx is non-NULL when we are writing */
-	if ((flags & HASH_WRITE_OBJECT) != 0)
+	if ((flags & HASH_WRITE_OBJECT) != 0) {
 		CALLOC_ARRAY(idx, 1);
 
+		prepare_to_stream(state, flags);
+		hashfile_checkpoint_init(state->f, &checkpoint);
+	}
+
 	already_hashed_to = 0;
 
 	while (1) {
diff --git a/csum-file.c b/csum-file.c
index ebffc80ef7..232121f415 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -206,6 +206,13 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
 	return hashfd_internal(fd, name, tp, 8 * 1024);
 }
 
+void hashfile_checkpoint_init(struct hashfile *f,
+			      struct hashfile_checkpoint *checkpoint)
+{
+	memset(checkpoint, 0, sizeof(*checkpoint));
+	f->algop->init_fn(&checkpoint->ctx);
+}
+
 void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
 {
 	hashflush(f);
diff --git a/csum-file.h b/csum-file.h
index 2b45f4673a..b7475f16c2 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -36,6 +36,7 @@ struct hashfile_checkpoint {
 	git_hash_ctx ctx;
 };
 
+void hashfile_checkpoint_init(struct hashfile *, struct hashfile_checkpoint *);
 void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint *);
 int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
 
-- 
cgit v1.3-5-g9baa