From 799237852bd265feb1e1a8d4a19780e20991b4fd Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:17:59 +0200 Subject: reftable: pass opts as constant pointer We sometimes pass the refatble write options as value and sometimes as a pointer. This is quite confusing and makes the reader wonder whether the options get modified sometimes. In fact, `reftable_new_writer()` does cause the caller-provided options to get updated when some values aren't set up. This is quite unexpected, but didn't cause any harm until now. Adapt the code so that we do not modify the caller-provided values anymore. While at it, refactor the code to code to consistently pass the options as a constant pointer to clarify that the caller-provided opts will not ever get modified. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'refs') diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 010ef811b6..f8f930380d 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -129,7 +129,7 @@ static struct reftable_stack *stack_for(struct reftable_ref_store *store, store->base.repo->commondir, wtname_buf.buf); store->err = reftable_new_stack(&stack, wt_dir.buf, - store->write_options); + &store->write_options); assert(store->err != REFTABLE_API_ERROR); strmap_put(&store->worktree_stacks, wtname_buf.buf, stack); } @@ -263,7 +263,7 @@ static struct ref_store *reftable_be_init(struct repository *repo, } strbuf_addstr(&path, "/reftable"); refs->err = reftable_new_stack(&refs->main_stack, path.buf, - refs->write_options); + &refs->write_options); if (refs->err) goto done; @@ -280,7 +280,7 @@ static struct ref_store *reftable_be_init(struct repository *repo, strbuf_addf(&path, "%s/reftable", gitdir); refs->err = reftable_new_stack(&refs->worktree_stack, path.buf, - refs->write_options); + &refs->write_options); if (refs->err) goto done; } -- cgit v1.3-5-g9baa From 831b366c243ecaa450289d76926bb687c746d495 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:18:19 +0200 Subject: refs/reftable: allow configuring block size Add a new option `reftable.blockSize` that allows the user to control the block size used by the reftable library. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/config.txt | 2 ++ Documentation/config/reftable.txt | 14 ++++++++ refs/reftable-backend.c | 31 ++++++++++++++++- t/t0613-reftable-write-options.sh | 72 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 Documentation/config/reftable.txt (limited to 'refs') diff --git a/Documentation/config.txt b/Documentation/config.txt index 6f649c997c..cbf0b99c44 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -498,6 +498,8 @@ include::config/rebase.txt[] include::config/receive.txt[] +include::config/reftable.txt[] + include::config/remote.txt[] include::config/remotes.txt[] diff --git a/Documentation/config/reftable.txt b/Documentation/config/reftable.txt new file mode 100644 index 0000000000..fa7c4be014 --- /dev/null +++ b/Documentation/config/reftable.txt @@ -0,0 +1,14 @@ +reftable.blockSize:: + The size in bytes used by the reftable backend when writing blocks. + The block size is determined by the writer, and does not have to be a + power of 2. The block size must be larger than the longest reference + name or log entry used in the repository, as references cannot span + blocks. ++ +Powers of two that are friendly to the virtual memory system or +filesystem (such as 4kB or 8kB) are recommended. Larger sizes (64kB) can +yield better compression, with a possible increased cost incurred by +readers during access. ++ +The largest block size is `16777215` bytes (15.99 MiB). The default value is +`4096` bytes (4kB). A value of `0` will use the default value. diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index f8f930380d..8d0ae9e285 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -1,6 +1,7 @@ #include "../git-compat-util.h" #include "../abspath.h" #include "../chdir-notify.h" +#include "../config.h" #include "../environment.h" #include "../gettext.h" #include "../hash.h" @@ -228,6 +229,22 @@ done: return ret; } +static int reftable_be_config(const char *var, const char *value, + const struct config_context *ctx, + void *_opts) +{ + struct reftable_write_options *opts = _opts; + + if (!strcmp(var, "reftable.blocksize")) { + unsigned long block_size = git_config_ulong(var, value, ctx->kvi); + if (block_size > 16777215) + die("reftable block size cannot exceed 16MB"); + opts->block_size = block_size; + } + + return 0; +} + static struct ref_store *reftable_be_init(struct repository *repo, const char *gitdir, unsigned int store_flags) @@ -243,12 +260,24 @@ static struct ref_store *reftable_be_init(struct repository *repo, base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable); strmap_init(&refs->worktree_stacks); refs->store_flags = store_flags; - refs->write_options.block_size = 4096; + refs->write_options.hash_id = repo->hash_algo->format_id; refs->write_options.default_permissions = calc_shared_perm(0666 & ~mask); refs->write_options.disable_auto_compact = !git_env_bool("GIT_TEST_REFTABLE_AUTOCOMPACTION", 1); + git_config(reftable_be_config, &refs->write_options); + + /* + * It is somewhat unfortunate that we have to mirror the default block + * size of the reftable library here. But given that the write options + * wouldn't be updated by the library here, and given that we require + * the proper block size to trim reflog message so that they fit, we + * must set up a proper value here. + */ + if (!refs->write_options.block_size) + refs->write_options.block_size = 4096; + /* * Set up the main reftable stack that is hosted in GIT_COMMON_DIR. * This stack contains both the shared and the main worktree refs. diff --git a/t/t0613-reftable-write-options.sh b/t/t0613-reftable-write-options.sh index 462980c37c..8bdbc6ec70 100755 --- a/t/t0613-reftable-write-options.sh +++ b/t/t0613-reftable-write-options.sh @@ -99,4 +99,76 @@ test_expect_success 'many refs results in multiple blocks' ' ) ' +test_expect_success 'tiny block size leads to error' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + cat >expect <<-EOF && + error: unable to compact stack: entry too large + EOF + test_must_fail git -c reftable.blockSize=50 pack-refs 2>err && + test_cmp expect err + ) +' + +test_expect_success 'small block size leads to multiple ref blocks' ' + test_config_global core.logAllRefUpdates false && + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + test_commit B && + git -c reftable.blockSize=100 pack-refs && + + cat >expect <<-EOF && + header: + block_size: 100 + ref: + - length: 53 + restarts: 1 + - length: 74 + restarts: 1 + - length: 38 + restarts: 1 + EOF + test-tool dump-reftable -b .git/reftable/*.ref >actual && + test_cmp expect actual + ) +' + +test_expect_success 'small block size fails with large reflog message' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + perl -e "print \"a\" x 500" >logmsg && + cat >expect <<-EOF && + fatal: update_ref failed for ref ${SQ}refs/heads/logme${SQ}: reftable: transaction failure: entry too large + EOF + test_must_fail git -c reftable.blockSize=100 \ + update-ref -m "$(cat logmsg)" refs/heads/logme HEAD 2>err && + test_cmp expect err + ) +' + +test_expect_success 'block size exceeding maximum supported size' ' + test_config_global core.logAllRefUpdates false && + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit A && + test_commit B && + cat >expect <<-EOF && + fatal: reftable block size cannot exceed 16MB + EOF + test_must_fail git -c reftable.blockSize=16777216 pack-refs 2>err && + test_cmp expect err + ) +' + test_done -- cgit v1.3-5-g9baa From 90db611c2a1f4ca2123ef5a8d7e592fa348bb23b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:18:28 +0200 Subject: refs/reftable: allow configuring restart interval Add a new option `reftable.restartInterval` that allows the user to control the restart interval when writing reftable records used by the reftable library. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/config/reftable.txt | 18 ++++++++++++++++ refs/reftable-backend.c | 5 +++++ t/t0613-reftable-write-options.sh | 43 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) (limited to 'refs') diff --git a/Documentation/config/reftable.txt b/Documentation/config/reftable.txt index fa7c4be014..2374be71d7 100644 --- a/Documentation/config/reftable.txt +++ b/Documentation/config/reftable.txt @@ -12,3 +12,21 @@ readers during access. + The largest block size is `16777215` bytes (15.99 MiB). The default value is `4096` bytes (4kB). A value of `0` will use the default value. + +reftable.restartInterval:: + The interval at which to create restart points. The reftable backend + determines the restart points at file creation. Every 16 may be + more suitable for smaller block sizes (4k or 8k), every 64 for larger + block sizes (64k). ++ +More frequent restart points reduces prefix compression and increases +space consumed by the restart table, both of which increase file size. ++ +Less frequent restart points makes prefix compression more effective, +decreasing overall file size, with increased penalties for readers +walking through more records after the binary search step. ++ +A maximum of `65535` restart points per block is supported. ++ +The default value is to create restart points every 16 records. A value of `0` +will use the default value. diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 8d0ae9e285..a2880aabce 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -240,6 +240,11 @@ static int reftable_be_config(const char *var, const char *value, if (block_size > 16777215) die("reftable block size cannot exceed 16MB"); opts->block_size = block_size; + } else if (!strcmp(var, "reftable.restartinterval")) { + unsigned long restart_interval = git_config_ulong(var, value, ctx->kvi); + if (restart_interval > UINT16_MAX) + die("reftable block size cannot exceed %u", (unsigned)UINT16_MAX); + opts->restart_interval = restart_interval; } return 0; diff --git a/t/t0613-reftable-write-options.sh b/t/t0613-reftable-write-options.sh index 8bdbc6ec70..e0a5b26f58 100755 --- a/t/t0613-reftable-write-options.sh +++ b/t/t0613-reftable-write-options.sh @@ -171,4 +171,47 @@ test_expect_success 'block size exceeding maximum supported size' ' ) ' +test_expect_success 'restart interval at every single record' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + for i in $(test_seq 10) + do + printf "update refs/heads/branch-%d HEAD\n" "$i" || + return 1 + done >input && + git update-ref --stdin expect <<-EOF && + header: + block_size: 4096 + ref: + - length: 566 + restarts: 13 + log: + - length: 1393 + restarts: 12 + EOF + test-tool dump-reftable -b .git/reftable/*.ref >actual && + test_cmp expect actual + ) +' + +test_expect_success 'restart interval exceeding maximum supported interval' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + cat >expect <<-EOF && + fatal: reftable block size cannot exceed 65535 + EOF + test_must_fail git -c reftable.restartInterval=65536 pack-refs 2>err && + test_cmp expect err + ) +' + test_done -- cgit v1.3-5-g9baa From afbdbfae0b44efe470c6c7998822900c61f71e39 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:18:33 +0200 Subject: refs/reftable: allow disabling writing the object index Besides the expected "ref" and "log" records, the reftable library also writes "obj" records. These are basically a reverse mapping of object IDs to their respective ref records so that it becomes efficient to figure out which references point to a specific object. The motivation for this data structure is the "uploadpack.allowTipSHA1InWant" config, which allows a client to fetch any object by its hash that has a ref pointing to it. This reverse index is not used by Git at all though, and the expectation is that most hosters nowadays use "uploadpack.allowAnySHA1InWant". It may thus be preferable for many users to disable writing these optional object indices altogether to safe some precious disk space. Add a new config "reftable.indexObjects" that allows the user to disable the object index altogether. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/config/reftable.txt | 6 ++++ refs/reftable-backend.c | 2 ++ t/t0613-reftable-write-options.sh | 69 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'refs') diff --git a/Documentation/config/reftable.txt b/Documentation/config/reftable.txt index 2374be71d7..68083876fa 100644 --- a/Documentation/config/reftable.txt +++ b/Documentation/config/reftable.txt @@ -30,3 +30,9 @@ A maximum of `65535` restart points per block is supported. + The default value is to create restart points every 16 records. A value of `0` will use the default value. + +reftable.indexObjects:: + Whether the reftable backend shall write object blocks. Object blocks + are a reverse mapping of object ID to the references pointing to them. ++ +The default value is `true`. diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index a2880aabce..5ffb36770a 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -245,6 +245,8 @@ static int reftable_be_config(const char *var, const char *value, if (restart_interval > UINT16_MAX) die("reftable block size cannot exceed %u", (unsigned)UINT16_MAX); opts->restart_interval = restart_interval; + } else if (!strcmp(var, "reftable.indexobjects")) { + opts->skip_index_objects = !git_config_bool(var, value); } return 0; diff --git a/t/t0613-reftable-write-options.sh b/t/t0613-reftable-write-options.sh index e0a5b26f58..e2708e11d5 100755 --- a/t/t0613-reftable-write-options.sh +++ b/t/t0613-reftable-write-options.sh @@ -214,4 +214,73 @@ test_expect_success 'restart interval exceeding maximum supported interval' ' ) ' +test_expect_success 'object index gets written by default with ref index' ' + test_config_global core.logAllRefUpdates false && + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + for i in $(test_seq 5) + do + printf "update refs/heads/branch-%d HEAD\n" "$i" || + return 1 + done >input && + git update-ref --stdin expect <<-EOF && + header: + block_size: 100 + ref: + - length: 53 + restarts: 1 + - length: 95 + restarts: 1 + - length: 71 + restarts: 1 + - length: 80 + restarts: 1 + obj: + - length: 11 + restarts: 1 + EOF + test-tool dump-reftable -b .git/reftable/*.ref >actual && + test_cmp expect actual + ) +' + +test_expect_success 'object index can be disabled' ' + test_config_global core.logAllRefUpdates false && + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + for i in $(test_seq 5) + do + printf "update refs/heads/branch-%d HEAD\n" "$i" || + return 1 + done >input && + git update-ref --stdin expect <<-EOF && + header: + block_size: 100 + ref: + - length: 53 + restarts: 1 + - length: 95 + restarts: 1 + - length: 71 + restarts: 1 + - length: 80 + restarts: 1 + EOF + test-tool dump-reftable -b .git/reftable/*.ref >actual && + test_cmp expect actual + ) +' + test_done -- cgit v1.3-5-g9baa From f518d91a2b6465c9951f4ee1ab316ee81894000c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 13 May 2024 10:18:43 +0200 Subject: refs/reftable: allow configuring geometric factor Allow configuring the geometric factor used by the auto-compaction algorithm whenever a new table is appended to the stack of tables. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Documentation/config/reftable.txt | 10 ++++++++++ refs/reftable-backend.c | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'refs') diff --git a/Documentation/config/reftable.txt b/Documentation/config/reftable.txt index 68083876fa..0515727977 100644 --- a/Documentation/config/reftable.txt +++ b/Documentation/config/reftable.txt @@ -36,3 +36,13 @@ reftable.indexObjects:: are a reverse mapping of object ID to the references pointing to them. + The default value is `true`. + +reftable.geometricFactor:: + Whenever the reftable backend appends a new table to the stack, it + performs auto compaction to ensure that there is only a handful of + tables. The backend does this by ensuring that tables form a geometric + sequence regarding the respective sizes of each table. ++ +By default, the geometric sequence uses a factor of 2, meaning that for any +table, the next-biggest table must at least be twice as big. A maximum factor +of 256 is supported. diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 5ffb36770a..da620fd598 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -247,6 +247,11 @@ static int reftable_be_config(const char *var, const char *value, opts->restart_interval = restart_interval; } else if (!strcmp(var, "reftable.indexobjects")) { opts->skip_index_objects = !git_config_bool(var, value); + } else if (!strcmp(var, "reftable.geometricfactor")) { + unsigned long factor = git_config_ulong(var, value, ctx->kvi); + if (factor > UINT8_MAX) + die("reftable geometric factor cannot exceed %u", (unsigned)UINT8_MAX); + opts->auto_compaction_factor = factor; } return 0; -- cgit v1.3-5-g9baa