aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2025-10-22 11:38:58 -0700
committerJunio C Hamano <gitster@pobox.com>2025-10-22 11:38:58 -0700
commit98401c10fc9e991b36c4ccf5a270746f123feeb6 (patch)
tree83d4cb6fc595ed0db95d680c28178f8679e8af46
parentc9ccf81948973e9b9632cbb483a3908307092620 (diff)
parentdb00605c13a9f5709da712671df5c7594c06cf31 (diff)
downloadgit-98401c10fc9e991b36c4ccf5a270746f123feeb6.tar.xz
Merge branch 'bc/sha1-256-interop-01'
The beginning of SHA1-SHA256 interoperability work. * bc/sha1-256-interop-01: t1010: use BROKEN_OBJECTS prerequisite t: allow specifying compatibility hash fsck: consider gpgsig headers expected in tags rev-parse: allow printing compatibility hash docs: add documentation for loose objects docs: improve ambiguous areas of pack format documentation docs: reflect actual double signature for tags docs: update offset order for pack index v3 docs: update pack index v3 format
-rw-r--r--Documentation/Makefile1
-rw-r--r--Documentation/fsck-msgids.adoc6
-rw-r--r--Documentation/git-rev-parse.adoc11
-rw-r--r--Documentation/gitformat-loose.adoc53
-rw-r--r--Documentation/gitformat-pack.adoc19
-rw-r--r--Documentation/meson.build1
-rw-r--r--Documentation/technical/hash-function-transition.adoc42
-rw-r--r--builtin/rev-parse.c11
-rw-r--r--fsck.c18
-rw-r--r--fsck.h2
-rwxr-xr-xt/t1010-mktree.sh13
-rwxr-xr-xt/t1450-fsck.sh54
-rwxr-xr-xt/t1500-rev-parse.sh34
-rw-r--r--t/test-lib-functions.sh9
-rw-r--r--t/test-lib.sh13
15 files changed, 255 insertions, 32 deletions
diff --git a/Documentation/Makefile b/Documentation/Makefile
index a3fbd29744..627204928e 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -34,6 +34,7 @@ MAN5_TXT += gitformat-bundle.adoc
MAN5_TXT += gitformat-chunk.adoc
MAN5_TXT += gitformat-commit-graph.adoc
MAN5_TXT += gitformat-index.adoc
+MAN5_TXT += gitformat-loose.adoc
MAN5_TXT += gitformat-pack.adoc
MAN5_TXT += gitformat-signature.adoc
MAN5_TXT += githooks.adoc
diff --git a/Documentation/fsck-msgids.adoc b/Documentation/fsck-msgids.adoc
index 81f11ba125..acac9683af 100644
--- a/Documentation/fsck-msgids.adoc
+++ b/Documentation/fsck-msgids.adoc
@@ -10,6 +10,12 @@
`badFilemode`::
(INFO) A tree contains a bad filemode entry.
+`badGpgsig`::
+ (ERROR) A tag contains a bad (truncated) signature (e.g., `gpgsig`) header.
+
+`badHeaderContinuation`::
+ (ERROR) A continuation header (such as for `gpgsig`) is unexpectedly truncated.
+
`badName`::
(ERROR) An author/committer name is empty.
diff --git a/Documentation/git-rev-parse.adoc b/Documentation/git-rev-parse.adoc
index 18383e52af..5398691f3f 100644
--- a/Documentation/git-rev-parse.adoc
+++ b/Documentation/git-rev-parse.adoc
@@ -324,11 +324,12 @@ The following options are unaffected by `--path-format`:
path of the current directory relative to the top-level
directory.
---show-object-format[=(storage|input|output)]::
- Show the object format (hash algorithm) used for the repository
- for storage inside the `.git` directory, input, or output. For
- input, multiple algorithms may be printed, space-separated.
- If not specified, the default is "storage".
+--show-object-format[=(storage|input|output|compat)]::
+ Show the object format (hash algorithm) used for the repository for storage
+ inside the `.git` directory, input, output, or compatibility. For input,
+ multiple algorithms may be printed, space-separated. If `compat` is
+ requested and no compatibility algorithm is enabled, prints an empty line. If
+ not specified, the default is "storage".
--show-ref-format::
Show the reference storage format used for the repository.
diff --git a/Documentation/gitformat-loose.adoc b/Documentation/gitformat-loose.adoc
new file mode 100644
index 0000000000..947993663e
--- /dev/null
+++ b/Documentation/gitformat-loose.adoc
@@ -0,0 +1,53 @@
+gitformat-loose(5)
+==================
+
+NAME
+----
+gitformat-loose - Git loose object format
+
+
+SYNOPSIS
+--------
+[verse]
+$GIT_DIR/objects/[0-9a-f][0-9a-f]/*
+
+DESCRIPTION
+-----------
+
+Loose objects are how Git stores individual objects, where every object is
+written as a separate file.
+
+Over the lifetime of a repository, objects are usually written as loose objects
+initially. Eventually, these loose objects will be compacted into packfiles
+via repository maintenance to improve disk space usage and speed up the lookup
+of these objects.
+
+== Loose objects
+
+Each loose object contains a prefix, followed immediately by the data of the
+object. The prefix contains `<type> <size>\0`. `<type>` is one of `blob`,
+`tree`, `commit`, or `tag` and `size` is the size of the data (without the
+prefix) as a decimal integer expressed in ASCII.
+
+The entire contents, prefix and data concatenated, is then compressed with zlib
+and the compressed data is stored in the file. The object ID of the object is
+the SHA-1 or SHA-256 (as appropriate) hash of the uncompressed data.
+
+The file for the loose object is stored under the `objects` directory, with the
+first two hex characters of the object ID being the directory and the remaining
+characters being the file name. This is done to shard the data and avoid too
+many files being in one directory, since some file systems perform poorly with
+many items in a directory.
+
+As an example, the empty tree contains the data (when uncompressed) `tree 0\0`
+and, in a SHA-256 repository, would have the object ID
+`6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321` and would be
+stored under
+`$GIT_DIR/objects/6e/f19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321`.
+
+Similarly, a blob containing the contents `abc` would have the uncompressed
+data of `blob 3\0abc`.
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/gitformat-pack.adoc b/Documentation/gitformat-pack.adoc
index d6ae229be5..1b4db4aa61 100644
--- a/Documentation/gitformat-pack.adoc
+++ b/Documentation/gitformat-pack.adoc
@@ -32,6 +32,10 @@ In a repository using the traditional SHA-1, pack checksums, index checksums,
and object IDs (object names) mentioned below are all computed using SHA-1.
Similarly, in SHA-256 repositories, these values are computed using SHA-256.
+CRC32 checksums are always computed over the entire packed object, including
+the header (n-byte type and length); the base object name or offset, if any;
+and the entire compressed object. The CRC32 algorithm used is that of zlib.
+
== pack-*.pack files have the following format:
- A header appears at the beginning and consists of the following:
@@ -80,6 +84,16 @@ Valid object types are:
Type 5 is reserved for future expansion. Type 0 is invalid.
+=== Object encoding
+
+Unlike loose objects, packed objects do not have a prefix containing the type,
+size, and a NUL byte. These are not necessary because they can be determined by
+the n-byte type and length that prefixes the data and so they are omitted from
+the compressed and deltified data.
+
+The computation of the object ID still uses this prefix by reconstructing it
+from the type and length as needed.
+
=== Size encoding
This document uses the following "size encoding" of non-negative
@@ -92,6 +106,11 @@ values are more significant.
This size encoding should not be confused with the "offset encoding",
which is also used in this document.
+When encoding the size of an undeltified object in a pack, the size is that of
+the uncompressed raw object. For deltified objects, it is the size of the
+uncompressed delta. The base object name or offset is not included in the size
+computation.
+
=== Deltified representation
Conceptually there are only four object types: commit, tree, tag and
diff --git a/Documentation/meson.build b/Documentation/meson.build
index 44f94cdb7b..9d24f2da54 100644
--- a/Documentation/meson.build
+++ b/Documentation/meson.build
@@ -173,6 +173,7 @@ manpages = {
'gitformat-chunk.adoc' : 5,
'gitformat-commit-graph.adoc' : 5,
'gitformat-index.adoc' : 5,
+ 'gitformat-loose.adoc' : 5,
'gitformat-pack.adoc' : 5,
'gitformat-signature.adoc' : 5,
'githooks.adoc' : 5,
diff --git a/Documentation/technical/hash-function-transition.adoc b/Documentation/technical/hash-function-transition.adoc
index f047fd80ca..2359d7d106 100644
--- a/Documentation/technical/hash-function-transition.adoc
+++ b/Documentation/technical/hash-function-transition.adoc
@@ -227,9 +227,9 @@ network byte order):
** 4-byte length in bytes of shortened object names. This is the
shortest possible length needed to make names in the shortened
object name table unambiguous.
- ** 4-byte integer, recording where tables relating to this format
+ ** 8-byte integer, recording where tables relating to this format
are stored in this index file, as an offset from the beginning.
- * 4-byte offset to the trailer from the beginning of this file.
+ * 8-byte offset to the trailer from the beginning of this file.
* Zero or more additional key/value pairs (4-byte key, 4-byte
value). Only one key is supported: 'PSRC'. See the "Loose objects
and unreachable objects" section for supported values and how this
@@ -260,12 +260,10 @@ network byte order):
compressed data to be copied directly from pack to pack during
repacking without undetected data corruption.
- * A table of 4-byte offset values. For an object in the table of
- sorted shortened object names, the value at the corresponding
- index in this table indicates where that object can be found in
- the pack file. These are usually 31-bit pack file offsets, but
- large offsets are encoded as an index into the next table with the
- most significant bit set.
+ * A table of 4-byte offset values. The index of this table in pack order
+ indicates where that object can be found in the pack file. These are
+ usually 31-bit pack file offsets, but large offsets are encoded as
+ an index into the next table with the most significant bit set.
* A table of 8-byte offset entries (empty for pack files less than
2 GiB). Pack files are organized with heavily used objects toward
@@ -276,10 +274,14 @@ network byte order):
up to and not including the table of CRC32 values.
- Zero or more NUL bytes.
- The trailer consists of the following:
- * A copy of the 20-byte SHA-256 checksum at the end of the
+ * A copy of the full main hash checksum at the end of the
corresponding packfile.
- * 20-byte SHA-256 checksum of all of the above.
+ * Full main hash checksum of all of the above.
+
+The "full main hash" is a full-length hash of the main (not compatibility)
+algorithm in the repository. Thus, if the main algorithm is SHA-256, this is
+a 32-byte SHA-256 hash and for SHA-1, it's a 20-byte SHA-1 hash.
Loose object index
~~~~~~~~~~~~~~~~~~
@@ -427,17 +429,19 @@ ordinary unsigned commit.
Signed Tags
~~~~~~~~~~~
-We add a new field "gpgsig-sha256" to the tag object format to allow
-signing tags without relying on SHA-1. Its signed payload is the
-SHA-256 content of the tag with its gpgsig-sha256 field and "-----BEGIN PGP
-SIGNATURE-----" delimited in-body signature removed.
+We add new fields "gpgsig" and "gpgsig-sha256" to the tag object format to
+allow signing tags in both formats. The in-body signature is used for the
+signature in the current hash algorithm and the header is used for the
+signature in the other algorithm. Thus, a dual-signature tag will contain both
+an in-body signature and a gpgsig-sha256 header for the SHA-1 format of an
+object or both an in-body signature and a gpgsig header for the SHA-256 format
+of and object.
-This means tags can be signed
+The signed payload of the tag is the content of the tag in the current
+algorithm with both its gpgsig and gpgsig-sha256 fields and
+"-----BEGIN PGP SIGNATURE-----" delimited in-body signature removed.
-1. using SHA-1 only, as in existing signed tag objects
-2. using both SHA-1 and SHA-256, by using gpgsig-sha256 and an in-body
- signature.
-3. using only SHA-256, by only using the gpgsig-sha256 field.
+This means tags can be signed using one or both algorithms.
Mergetag embedding
~~~~~~~~~~~~~~~~~~
diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index 9da92b990d..7b3711cf34 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -1107,11 +1107,20 @@ int cmd_rev_parse(int argc,
const char *val = arg ? arg : "storage";
if (strcmp(val, "storage") &&
+ strcmp(val, "compat") &&
strcmp(val, "input") &&
strcmp(val, "output"))
die(_("unknown mode for --show-object-format: %s"),
arg);
- puts(the_hash_algo->name);
+
+ if (!strcmp(val, "compat")) {
+ if (the_repository->compat_hash_algo)
+ puts(the_repository->compat_hash_algo->name);
+ else
+ putchar('\n');
+ } else {
+ puts(the_hash_algo->name);
+ }
continue;
}
if (!strcmp(arg, "--show-ref-format")) {
diff --git a/fsck.c b/fsck.c
index 171b424dd5..341e100d24 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1067,6 +1067,24 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
else
ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
+ if (buffer < buffer_end && (skip_prefix(buffer, "gpgsig ", &buffer) || skip_prefix(buffer, "gpgsig-sha256 ", &buffer))) {
+ eol = memchr(buffer, '\n', buffer_end - buffer);
+ if (!eol) {
+ ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_GPGSIG, "invalid format - unexpected end after 'gpgsig' or 'gpgsig-sha256' line");
+ goto done;
+ }
+ buffer = eol + 1;
+
+ while (buffer < buffer_end && starts_with(buffer, " ")) {
+ eol = memchr(buffer, '\n', buffer_end - buffer);
+ if (!eol) {
+ ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_HEADER_CONTINUATION, "invalid format - unexpected end in 'gpgsig' or 'gpgsig-sha256' continuation line");
+ goto done;
+ }
+ buffer = eol + 1;
+ }
+ }
+
if (buffer < buffer_end && !starts_with(buffer, "\n")) {
/*
* The verify_headers() check will allow
diff --git a/fsck.h b/fsck.h
index 759df97655..cb6ef32f4f 100644
--- a/fsck.h
+++ b/fsck.h
@@ -25,9 +25,11 @@ enum fsck_msg_type {
FUNC(NUL_IN_HEADER, FATAL) \
FUNC(UNTERMINATED_HEADER, FATAL) \
/* errors */ \
+ FUNC(BAD_HEADER_CONTINUATION, ERROR) \
FUNC(BAD_DATE, ERROR) \
FUNC(BAD_DATE_OVERFLOW, ERROR) \
FUNC(BAD_EMAIL, ERROR) \
+ FUNC(BAD_GPGSIG, ERROR) \
FUNC(BAD_NAME, ERROR) \
FUNC(BAD_OBJECT_SHA1, ERROR) \
FUNC(BAD_PACKED_REF_ENTRY, ERROR) \
diff --git a/t/t1010-mktree.sh b/t/t1010-mktree.sh
index e9973f7494..312fe6717a 100755
--- a/t/t1010-mktree.sh
+++ b/t/t1010-mktree.sh
@@ -11,10 +11,13 @@ test_expect_success setup '
git add "$d" || return 1
done &&
echo zero >one &&
- git update-index --add --info-only one &&
- git write-tree --missing-ok >tree.missing &&
- git ls-tree $(cat tree.missing) >top.missing &&
- git ls-tree -r $(cat tree.missing) >all.missing &&
+ if test_have_prereq BROKEN_OBJECTS
+ then
+ git update-index --add --info-only one &&
+ git write-tree --missing-ok >tree.missing &&
+ git ls-tree $(cat tree.missing) >top.missing &&
+ git ls-tree -r $(cat tree.missing) >all.missing
+ fi &&
echo one >one &&
git add one &&
git write-tree >tree &&
@@ -53,7 +56,7 @@ test_expect_success 'ls-tree output in wrong order given to mktree (2)' '
test_cmp tree.withsub actual
'
-test_expect_success 'allow missing object with --missing' '
+test_expect_success BROKEN_OBJECTS 'allow missing object with --missing' '
git mktree --missing <top.missing >actual &&
test_cmp tree.missing actual
'
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 5ae86c42be..c4b651c2dc 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -454,6 +454,60 @@ test_expect_success 'tag with NUL in header' '
test_grep "error in tag $tag.*unterminated header: NUL at offset" out
'
+test_expect_success 'tag accepts gpgsig header even if not validly signed' '
+ test_oid_cache <<-\EOF &&
+ header sha1:gpgsig-sha256
+ header sha256:gpgsig
+ EOF
+ header=$(test_oid header) &&
+ sha=$(git rev-parse HEAD) &&
+ cat >good-tag <<-EOF &&
+ object $sha
+ type commit
+ tag good
+ tagger T A Gger <tagger@example.com> 1234567890 -0000
+ $header -----BEGIN PGP SIGNATURE-----
+ Not a valid signature
+ -----END PGP SIGNATURE-----
+
+ This is a good tag.
+ EOF
+
+ tag=$(git hash-object --literally -t tag -w --stdin <good-tag) &&
+ test_when_finished "remove_object $tag" &&
+ git update-ref refs/tags/good $tag &&
+ test_when_finished "git update-ref -d refs/tags/good" &&
+ git -c fsck.extraHeaderEntry=error fsck --tags
+'
+
+test_expect_success 'tag rejects invalid headers' '
+ test_oid_cache <<-\EOF &&
+ header sha1:gpgsig-sha256
+ header sha256:gpgsig
+ EOF
+ header=$(test_oid header) &&
+ sha=$(git rev-parse HEAD) &&
+ cat >bad-tag <<-EOF &&
+ object $sha
+ type commit
+ tag good
+ tagger T A Gger <tagger@example.com> 1234567890 -0000
+ $header -----BEGIN PGP SIGNATURE-----
+ Not a valid signature
+ -----END PGP SIGNATURE-----
+ junk
+
+ This is a bad tag with junk at the end of the headers.
+ EOF
+
+ tag=$(git hash-object --literally -t tag -w --stdin <bad-tag) &&
+ test_when_finished "remove_object $tag" &&
+ git update-ref refs/tags/bad $tag &&
+ test_when_finished "git update-ref -d refs/tags/bad" &&
+ test_must_fail git -c fsck.extraHeaderEntry=error fsck --tags 2>out &&
+ test_grep "error in tag $tag.*invalid format - extra header" out
+'
+
test_expect_success 'cleaned up' '
git fsck >actual 2>&1 &&
test_must_be_empty actual
diff --git a/t/t1500-rev-parse.sh b/t/t1500-rev-parse.sh
index 58a4583088..7739ab611b 100755
--- a/t/t1500-rev-parse.sh
+++ b/t/t1500-rev-parse.sh
@@ -207,6 +207,40 @@ test_expect_success 'rev-parse --show-object-format in repo' '
grep "unknown mode for --show-object-format: squeamish-ossifrage" err
'
+
+test_expect_success 'rev-parse --show-object-format in repo with compat mode' '
+ mkdir repo &&
+ (
+ sane_unset GIT_DEFAULT_HASH &&
+ cd repo &&
+ git init --object-format=sha256 &&
+ git config extensions.compatobjectformat sha1 &&
+ echo sha256 >expect &&
+ git rev-parse --show-object-format >actual &&
+ test_cmp expect actual &&
+ git rev-parse --show-object-format=storage >actual &&
+ test_cmp expect actual &&
+ git rev-parse --show-object-format=input >actual &&
+ test_cmp expect actual &&
+ git rev-parse --show-object-format=output >actual &&
+ test_cmp expect actual &&
+ echo sha1 >expect &&
+ git rev-parse --show-object-format=compat >actual &&
+ test_cmp expect actual &&
+ test_must_fail git rev-parse --show-object-format=squeamish-ossifrage 2>err &&
+ grep "unknown mode for --show-object-format: squeamish-ossifrage" err
+ ) &&
+ mkdir repo2 &&
+ (
+ sane_unset GIT_DEFAULT_HASH &&
+ cd repo2 &&
+ git init --object-format=sha256 &&
+ echo >expect &&
+ git rev-parse --show-object-format=compat >actual &&
+ test_cmp expect actual
+ )
+'
+
test_expect_success 'rev-parse --show-ref-format' '
test_detect_ref_format >expect &&
git rev-parse --show-ref-format >actual &&
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index a28de7b19b..52d7759bf5 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1708,11 +1708,16 @@ test_set_hash () {
# Detect the hash algorithm in use.
test_detect_hash () {
case "${GIT_TEST_DEFAULT_HASH:-$GIT_TEST_BUILTIN_HASH}" in
- "sha256")
+ *:*)
+ test_hash_algo="${GIT_TEST_DEFAULT_HASH%%:*}"
+ test_compat_hash_algo="${GIT_TEST_DEFAULT_HASH##*:}"
+ test_repo_compat_hash_algo="$test_compat_hash_algo"
+ ;;
+ sha256)
test_hash_algo=sha256
test_compat_hash_algo=sha1
;;
- *)
+ sha1)
test_hash_algo=sha1
test_compat_hash_algo=sha256
;;
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 562f950fb0..ef0ab7ec2d 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1924,6 +1924,19 @@ test_lazy_prereq DEFAULT_HASH_ALGORITHM '
test_lazy_prereq DEFAULT_REPO_FORMAT '
test_have_prereq SHA1,REFFILES
'
+# BROKEN_OBJECTS is a test whether we can write deliberately broken objects and
+# expect them to work. When running using SHA-256 mode with SHA-1
+# compatibility, we cannot write such objects because there's no SHA-1
+# compatibility value for a nonexistent object.
+test_lazy_prereq BROKEN_OBJECTS '
+ ! test_have_prereq COMPAT_HASH
+'
+
+# COMPAT_HASH is a test if we're operating in a repository with SHA-256 with
+# SHA-1 compatibility.
+test_lazy_prereq COMPAT_HASH '
+ test -n "$test_repo_compat_hash_algo"
+'
# Ensure that no test accidentally triggers a Git command
# that runs the actual maintenance scheduler, affecting a user's