From 917a2b3ce990717ccce5ff372766d9ffd41239f6 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:34 +0100 Subject: reftable: handle interrupted reads There are calls to pread(3P) and read(3P) where we don't properly handle interrupts. Convert them to use `pread_in_full()` and `read_in_full()`, respectively. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'reftable/stack.c') diff --git a/reftable/stack.c b/reftable/stack.c index ddbdf1b9c8..ed108a929b 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -92,7 +92,7 @@ static int fd_read_lines(int fd, char ***namesp) } buf = reftable_malloc(size + 1); - if (read(fd, buf, size) != size) { + if (read_in_full(fd, buf, size) != size) { err = REFTABLE_IO_ERROR; goto done; } -- cgit v1.3 From 85a8c899cedf46b79dd333549ed24b7ea7697862 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:38 +0100 Subject: reftable: handle interrupted writes There are calls to write(3P) where we don't properly handle interrupts. Convert them to use `write_in_full()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 6 +++--- reftable/stack_test.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'reftable/stack.c') diff --git a/reftable/stack.c b/reftable/stack.c index ed108a929b..f0cadad490 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -42,7 +42,7 @@ static void stack_filename(struct strbuf *dest, struct reftable_stack *st, static ssize_t reftable_fd_write(void *arg, const void *data, size_t sz) { int *fdp = (int *)arg; - return write(*fdp, data, sz); + return write_in_full(*fdp, data, sz); } int reftable_new_stack(struct reftable_stack **dest, const char *dir, @@ -554,7 +554,7 @@ int reftable_addition_commit(struct reftable_addition *add) strbuf_addstr(&table_list, "\n"); } - err = write(add->lock_file_fd, table_list.buf, table_list.len); + err = write_in_full(add->lock_file_fd, table_list.buf, table_list.len); strbuf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; @@ -1024,7 +1024,7 @@ static int stack_compact_range(struct reftable_stack *st, int first, int last, strbuf_addstr(&ref_list_contents, "\n"); } - err = write(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); + err = write_in_full(lock_file_fd, ref_list_contents.buf, ref_list_contents.len); if (err < 0) { err = REFTABLE_IO_ERROR; unlink(new_table_path.buf); diff --git a/reftable/stack_test.c b/reftable/stack_test.c index d0b717510f..0644c8ad2e 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -78,7 +78,7 @@ static void test_read_file(void) int i = 0; EXPECT(fd > 0); - n = write(fd, out, strlen(out)); + n = write_in_full(fd, out, strlen(out)); EXPECT(n == strlen(out)); err = close(fd); EXPECT(err >= 0); -- cgit v1.3 From 5c086453ff32a2ca07ac455594d59a9943b5d113 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:46 +0100 Subject: reftable/stack: perform auto-compaction with transactional interface Whenever updating references or reflog entries in the reftable stack, we need to add a new table to the stack, thus growing the stack's length by one. The stack can grow to become quite long rather quickly, leading to performance issues when trying to read records. But besides performance issues, this can also lead to exhaustion of file descriptors very rapidly as every single table requires a separate descriptor when opening the stack. While git-pack-refs(1) fixes this issue for us by merging the tables, it runs too irregularly to keep the length of the stack within reasonable limits. This is why the reftable stack has an auto-compaction mechanism: `reftable_stack_add()` will call `reftable_stack_auto_compact()` after its added the new table, which will auto-compact the stack as required. But while this logic works alright for `reftable_stack_add()`, we do not do the same in `reftable_addition_commit()`, which is the transactional equivalent to the former function that allows us to write multiple updates to the stack atomically. Consequentially, we will easily run into file descriptor exhaustion in code paths that use many separate transactions like e.g. non-atomic fetches. Fix this issue by calling `reftable_stack_auto_compact()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 6 ++++++ reftable/stack_test.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'reftable/stack.c') diff --git a/reftable/stack.c b/reftable/stack.c index f0cadad490..f5d18a842a 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -584,6 +584,12 @@ int reftable_addition_commit(struct reftable_addition *add) add->new_tables_len = 0; err = reftable_stack_reload(add->stack); + if (err) + goto done; + + if (!add->stack->disable_auto_compact) + err = reftable_stack_auto_compact(add->stack); + done: reftable_addition_close(add); return err; diff --git a/reftable/stack_test.c b/reftable/stack_test.c index 52b4dc3b14..14a3fc11ee 100644 --- a/reftable/stack_test.c +++ b/reftable/stack_test.c @@ -289,6 +289,61 @@ static void test_reftable_stack_transaction_api(void) clear_dir(dir); } +static void test_reftable_stack_transaction_api_performs_auto_compaction(void) +{ + char *dir = get_tmp_dir(__LINE__); + struct reftable_write_options cfg = {0}; + struct reftable_addition *add = NULL; + struct reftable_stack *st = NULL; + int i, n = 20, err; + + err = reftable_new_stack(&st, dir, cfg); + EXPECT_ERR(err); + + for (i = 0; i <= n; i++) { + struct reftable_ref_record ref = { + .update_index = reftable_stack_next_update_index(st), + .value_type = REFTABLE_REF_SYMREF, + .value.symref = "master", + }; + char name[100]; + + snprintf(name, sizeof(name), "branch%04d", i); + ref.refname = name; + + /* + * Disable auto-compaction for all but the last runs. Like this + * we can ensure that we indeed honor this setting and have + * better control over when exactly auto compaction runs. + */ + st->disable_auto_compact = i != n; + + err = reftable_stack_new_addition(&add, st); + EXPECT_ERR(err); + + err = reftable_addition_add(add, &write_test_ref, &ref); + EXPECT_ERR(err); + + err = reftable_addition_commit(add); + EXPECT_ERR(err); + + reftable_addition_destroy(add); + + /* + * The stack length should grow continuously for all runs where + * auto compaction is disabled. When enabled, we should merge + * all tables in the stack. + */ + if (i != n) + EXPECT(st->merged->stack_len == i + 1); + else + EXPECT(st->merged->stack_len == 1); + } + + reftable_stack_destroy(st); + clear_dir(dir); +} + static void test_reftable_stack_validate_refname(void) { struct reftable_write_options cfg = { 0 }; @@ -1016,6 +1071,7 @@ int stack_test_main(int argc, const char *argv[]) RUN_TEST(test_reftable_stack_log_normalize); RUN_TEST(test_reftable_stack_tombstone); RUN_TEST(test_reftable_stack_transaction_api); + RUN_TEST(test_reftable_stack_transaction_api_performs_auto_compaction); RUN_TEST(test_reftable_stack_update_index_check); RUN_TEST(test_reftable_stack_uptodate); RUN_TEST(test_reftable_stack_validate_refname); -- cgit v1.3 From d779996a1087747886853f4e73cff920c09ab1a2 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:50 +0100 Subject: reftable/stack: reuse buffers when reloading stack In `reftable_stack_reload_once()` we iterate over all the tables added to the stack in order to figure out whether any of the tables needs to be reloaded. We use a set of buffers in this context to compute the paths of these tables, but discard those buffers on every iteration. This is quite wasteful given that we do not need to transfer ownership of the allocated buffer outside of the loop. Refactor the code to instead reuse the buffers to reduce the number of allocations we need to do. Note that we do not have to manually reset the buffer because `stack_filename()` does this for us already. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'reftable/stack.c') diff --git a/reftable/stack.c b/reftable/stack.c index f5d18a842a..2dd2373360 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -204,6 +204,7 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, reftable_calloc(sizeof(struct reftable_table) * names_len); int new_readers_len = 0; struct reftable_merged_table *new_merged = NULL; + struct strbuf table_path = STRBUF_INIT; int i; while (*names) { @@ -223,13 +224,10 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, if (!rd) { struct reftable_block_source src = { NULL }; - struct strbuf table_path = STRBUF_INIT; stack_filename(&table_path, st, name); err = reftable_block_source_from_file(&src, table_path.buf); - strbuf_release(&table_path); - if (err < 0) goto done; @@ -267,16 +265,13 @@ static int reftable_stack_reload_once(struct reftable_stack *st, char **names, for (i = 0; i < cur_len; i++) { if (cur[i]) { const char *name = reader_name(cur[i]); - struct strbuf filename = STRBUF_INIT; - stack_filename(&filename, st, name); + stack_filename(&table_path, st, name); reader_close(cur[i]); reftable_reader_free(cur[i]); /* On Windows, can only unlink after closing. */ - unlink(filename.buf); - - strbuf_release(&filename); + unlink(table_path.buf); } } @@ -288,6 +283,7 @@ done: reftable_free(new_readers); reftable_free(new_tables); reftable_free(cur); + strbuf_release(&table_path); return err; } -- cgit v1.3 From 3054fbd93edb5f12c1a320dfb6abec139bdf9628 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:54 +0100 Subject: reftable/stack: fix stale lock when dying When starting a transaction via `reftable_stack_init_addition()`, we create a lockfile for the reftable stack itself which we'll write the new list of tables to. But if we terminate abnormally e.g. via a call to `die()`, then we do not remove the lockfile. Subsequent executions of Git which try to modify references will thus fail with an out-of-date error. Fix this bug by registering the lock as a `struct tempfile`, which ensures automatic cleanup for us. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/stack.c | 47 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) (limited to 'reftable/stack.c') diff --git a/reftable/stack.c b/reftable/stack.c index 2dd2373360..0c235724e2 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -17,6 +17,8 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable-merged.h" #include "writer.h" +#include "tempfile.h" + static int stack_try_add(struct reftable_stack *st, int (*write_table)(struct reftable_writer *wr, void *arg), @@ -440,8 +442,7 @@ static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) } struct reftable_addition { - int lock_file_fd; - struct strbuf lock_file_name; + struct tempfile *lock_file; struct reftable_stack *stack; char **new_tables; @@ -449,24 +450,19 @@ struct reftable_addition { uint64_t next_update_index; }; -#define REFTABLE_ADDITION_INIT \ - { \ - .lock_file_name = STRBUF_INIT \ - } +#define REFTABLE_ADDITION_INIT {0} static int reftable_stack_init_addition(struct reftable_addition *add, struct reftable_stack *st) { + struct strbuf lock_file_name = STRBUF_INIT; int err = 0; add->stack = st; - strbuf_reset(&add->lock_file_name); - strbuf_addstr(&add->lock_file_name, st->list_file); - strbuf_addstr(&add->lock_file_name, ".lock"); + strbuf_addf(&lock_file_name, "%s.lock", st->list_file); - add->lock_file_fd = open(add->lock_file_name.buf, - O_EXCL | O_CREAT | O_WRONLY, 0666); - if (add->lock_file_fd < 0) { + add->lock_file = create_tempfile(lock_file_name.buf); + if (!add->lock_file) { if (errno == EEXIST) { err = REFTABLE_LOCK_ERROR; } else { @@ -475,7 +471,7 @@ static int reftable_stack_init_addition(struct reftable_addition *add, goto done; } if (st->config.default_permissions) { - if (chmod(add->lock_file_name.buf, st->config.default_permissions) < 0) { + if (chmod(add->lock_file->filename.buf, st->config.default_permissions) < 0) { err = REFTABLE_IO_ERROR; goto done; } @@ -495,6 +491,7 @@ done: if (err) { reftable_addition_close(add); } + strbuf_release(&lock_file_name); return err; } @@ -512,15 +509,7 @@ static void reftable_addition_close(struct reftable_addition *add) add->new_tables = NULL; add->new_tables_len = 0; - if (add->lock_file_fd > 0) { - close(add->lock_file_fd); - add->lock_file_fd = 0; - } - if (add->lock_file_name.len > 0) { - unlink(add->lock_file_name.buf); - strbuf_release(&add->lock_file_name); - } - + delete_tempfile(&add->lock_file); strbuf_release(&nm); } @@ -536,8 +525,10 @@ void reftable_addition_destroy(struct reftable_addition *add) int reftable_addition_commit(struct reftable_addition *add) { struct strbuf table_list = STRBUF_INIT; + int lock_file_fd = get_tempfile_fd(add->lock_file); int i = 0; int err = 0; + if (add->new_tables_len == 0) goto done; @@ -550,28 +541,20 @@ int reftable_addition_commit(struct reftable_addition *add) strbuf_addstr(&table_list, "\n"); } - err = write_in_full(add->lock_file_fd, table_list.buf, table_list.len); + err = write_in_full(lock_file_fd, table_list.buf, table_list.len); strbuf_release(&table_list); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } - err = close(add->lock_file_fd); - add->lock_file_fd = 0; - if (err < 0) { - err = REFTABLE_IO_ERROR; - goto done; - } - - err = rename(add->lock_file_name.buf, add->stack->list_file); + err = rename_tempfile(&add->lock_file, add->stack->list_file); if (err < 0) { err = REFTABLE_IO_ERROR; goto done; } /* success, no more state to clean up. */ - strbuf_release(&add->lock_file_name); for (i = 0; i < add->new_tables_len; i++) { reftable_free(add->new_tables[i]); } -- cgit v1.3 From 9abda98149e0f164ac0532fc6ca89b337049b9c3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 11 Dec 2023 10:07:59 +0100 Subject: reftable/stack: fix use of unseeded randomness When writing a new reftable stack, Git will first create the stack with a random suffix so that concurrent updates will not try to write to the same file. This random suffix is computed via a call to rand(3P). But we never seed the function via srand(3P), which means that the suffix is in fact always the same. Fix this bug by using `git_rand()` instead, which does not need to be initialized. While this function is likely going to be slower depending on the platform, this slowness should not matter in practice as we only use it when writing a new reftable stack. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/readwrite_test.c | 6 +++--- reftable/stack.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'reftable/stack.c') diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c index 469ab79a5a..278663f22d 100644 --- a/reftable/readwrite_test.c +++ b/reftable/readwrite_test.c @@ -141,8 +141,8 @@ static void test_log_buffer_size(void) */ uint8_t hash1[GIT_SHA1_RAWSZ], hash2[GIT_SHA1_RAWSZ]; for (i = 0; i < GIT_SHA1_RAWSZ; i++) { - hash1[i] = (uint8_t)(rand() % 256); - hash2[i] = (uint8_t)(rand() % 256); + hash1[i] = (uint8_t)(git_rand() % 256); + hash2[i] = (uint8_t)(git_rand() % 256); } log.value.update.old_hash = hash1; log.value.update.new_hash = hash2; @@ -320,7 +320,7 @@ static void test_log_zlib_corruption(void) }; for (i = 0; i < sizeof(message) - 1; i++) - message[i] = (uint8_t)(rand() % 64 + ' '); + message[i] = (uint8_t)(git_rand() % 64 + ' '); reftable_writer_set_limits(w, 1, 1); diff --git a/reftable/stack.c b/reftable/stack.c index 0c235724e2..16bab82063 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -434,7 +434,7 @@ int reftable_stack_add(struct reftable_stack *st, static void format_name(struct strbuf *dest, uint64_t min, uint64_t max) { char buf[100]; - uint32_t rnd = (uint32_t)rand(); + uint32_t rnd = (uint32_t)git_rand(); snprintf(buf, sizeof(buf), "0x%012" PRIx64 "-0x%012" PRIx64 "-%08x", min, max, rnd); strbuf_reset(dest); -- cgit v1.3