From 5a6dce70d7bb12ee2bc7926254c5b6741b91ac5f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:55 +0000 Subject: hash: set, copy, and use algo field in struct object_id Now that struct object_id has an algorithm field, we should populate it. This will allow us to handle object IDs in any supported algorithm and distinguish between them. Ensure that the field is written whenever we write an object ID by storing it explicitly every time we write an object. Set values for the empty blob and tree values as well. In addition, use the algorithm field to compare object IDs. Note that because we zero-initialize struct object_id in many places throughout the codebase, we default to the default algorithm in cases where the algorithm field is zero rather than explicitly initialize all of those locations. This leads to a branch on every comparison, but the alternative is to compare the entire buffer each time and padding the buffer for SHA-1. That alternative ranges up to 3.9% worse than this approach on the perf t0001, t1450, and t1451. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hex.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'hex.c') diff --git a/hex.c b/hex.c index da51e64929..e7af18fe55 100644 --- a/hex.c +++ b/hex.c @@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1) int get_oid_hex_algop(const char *hex, struct object_id *oid, const struct git_hash_algo *algop) { - return get_hash_hex_algop(hex, oid->hash, algop); + int ret = get_hash_hex_algop(hex, oid->hash, algop); + if (!ret) + oid_set_algo(oid, algop); + return ret; } /* @@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid) { int i; for (i = GIT_HASH_NALGOS - 1; i > 0; i--) { - if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i])) + if (!get_oid_hex_algop(hex, oid, &hash_algos[i])) return i; } return GIT_HASH_UNKNOWN; @@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid, const char **end, const struct git_hash_algo *algop) { - int ret = get_hash_hex_algop(hex, oid->hash, algop); + int ret = get_oid_hex_algop(hex, oid, algop); if (!ret) *end = hex + algop->hexsz; return ret; -- cgit v1.3 From b8505ecbf2b1e4ef27b9597fd113cb1679792b29 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:03:00 +0000 Subject: hex: default to the_hash_algo on zero algorithm value There are numerous places in the codebase where we assume we can initialize data by zeroing all its bytes. However, when we do that with a struct object_id, it leaves the structure with a zero value for the algorithm, which is invalid. We could forbid this pattern and require that all struct object_id instances be initialized using oidclr, but this seems burdensome and it's unnatural to most C programmers. Instead, if the algorithm is zero, assume we wanted to use the default hash algorithm instead. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hex.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'hex.c') diff --git a/hex.c b/hex.c index e7af18fe55..74d256f239 100644 --- a/hex.c +++ b/hex.c @@ -124,6 +124,13 @@ char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash, char *buf = buffer; int i; + /* + * Our struct object_id has been memset to 0, so default to printing + * using the default hash. + */ + if (algop == &hash_algos[0]) + algop = the_hash_algo; + for (i = 0; i < algop->rawsz; i++) { unsigned int val = *hash++; *buf++ = hex[val >> 4]; -- cgit v1.3 From 3dd71461e25b4cc7ea2a2d8deef1c0486bb32580 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:03:01 +0000 Subject: hex: print objects using the hash algorithm member Now that all code paths correctly set the hash algorithm member of struct object_id, write an object's hex representation using the hash algorithm member embedded in it. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- hex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hex.c') diff --git a/hex.c b/hex.c index 74d256f239..4f64d34696 100644 --- a/hex.c +++ b/hex.c @@ -143,7 +143,7 @@ char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash, char *oid_to_hex_r(char *buffer, const struct object_id *oid) { - return hash_to_hex_algop_r(buffer, oid->hash, the_hash_algo); + return hash_to_hex_algop_r(buffer, oid->hash, &hash_algos[oid->algo]); } char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop) @@ -161,5 +161,5 @@ char *hash_to_hex(const unsigned char *hash) char *oid_to_hex(const struct object_id *oid) { - return hash_to_hex_algop(oid->hash, the_hash_algo); + return hash_to_hex_algop(oid->hash, &hash_algos[oid->algo]); } -- cgit v1.3