From f3c1db4b2a23fac171a699b10f9328f8df52602f Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:33 -0500 Subject: bulk-checkin: remove ODB transaction nesting ODB transactions support being nested. Only the outermost {begin,end}_odb_transaction() start and finish a transaction. This allows internal object write codepaths to be optimized with ODB transactions without worrying about whether a transaction is already active. When {begin,end}_odb_transaction() is invoked during an active transaction, these operations are essentially treated as no-ops. This can make the interface a bit awkward to use, as calling end_odb_transaction() does not guarantee that a transaction is actually ended. Thus, in situations where a transaction needs to be explicitly flushed, flush_odb_transaction() must be used. To remove the need for an explicit transaction flush operation via flush_odb_transaction() and better clarify transaction semantics, drop the transaction nesting mechanism in favor of begin_odb_transaction() returning a NULL transaction value to signal it was a no-op, and end_odb_transaction() behaving as a no-op when a NULL transaction value is passed. This is safe for existing callers as the transaction value wired to end_odb_transaction() already comes from begin_odb_transaction() and thus continues the same no-op behavior when a transaction is already pending. With this model, passing a pending transaction to end_odb_transaction() ensures it is committed at that point in time. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index bc15af4245..5e76573549 100644 --- a/object-file.c +++ b/object-file.c @@ -1267,7 +1267,7 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(transaction, + ret = index_blob_bulk_checkin(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); end_odb_transaction(transaction); -- cgit v1.3 From 78839e9cdead363d10190a009783c2d18149cc54 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:36 -0500 Subject: object-file: relocate ODB transaction code The bulk-checkin subsystem provides various functions to manage ODB transactions. Apart from {begin,end}_odb_transaction(), these functions are only used by the object-file subsystem to manage aspects of a transaction implementation specific to the files object source. Relocate all the transaction code in bulk-checkin to object-file. This simplifies the exposed transaction interface by reducing it to only {begin,end}_odb_transaction(). Function and type names are adjusted in the subsequent commit to better fit the new location. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- Makefile | 1 - builtin/add.c | 2 +- builtin/unpack-objects.c | 1 - builtin/update-index.c | 1 - bulk-checkin.c | 393 ---------------------------------------------- bulk-checkin.h | 52 ------- cache-tree.c | 1 - meson.build | 1 - object-file.c | 394 ++++++++++++++++++++++++++++++++++++++++++++++- object-file.h | 16 ++ read-cache.c | 1 - 11 files changed, 410 insertions(+), 453 deletions(-) delete mode 100644 bulk-checkin.c delete mode 100644 bulk-checkin.h (limited to 'object-file.c') diff --git a/Makefile b/Makefile index 4c95affadb..d25d4255f8 100644 --- a/Makefile +++ b/Makefile @@ -974,7 +974,6 @@ LIB_OBJS += blame.o LIB_OBJS += blob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o -LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle-uri.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o diff --git a/builtin/add.c b/builtin/add.c index 740c7c4581..8294366d68 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -14,13 +14,13 @@ #include "gettext.h" #include "pathspec.h" #include "run-command.h" +#include "object-file.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" #include "diff.h" #include "read-cache.h" #include "revision.h" -#include "bulk-checkin.h" #include "strvec.h" #include "submodule.h" #include "add-interactive.h" diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 28124b324d..4596fff0da 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -2,7 +2,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/builtin/update-index.c b/builtin/update-index.c index d36bc55752..ee01c4e423 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "builtin.h" -#include "bulk-checkin.h" #include "config.h" #include "environment.h" #include "gettext.h" diff --git a/bulk-checkin.c b/bulk-checkin.c deleted file mode 100644 index 5de848deff..0000000000 --- a/bulk-checkin.c +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ - -#define USE_THE_REPOSITORY_VARIABLE - -#include "git-compat-util.h" -#include "bulk-checkin.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "lockfile.h" -#include "repository.h" -#include "csum-file.h" -#include "pack.h" -#include "strbuf.h" -#include "tmp-objdir.h" -#include "packfile.h" -#include "object-file.h" -#include "odb.h" - -struct bulk_checkin_packfile { - char *pack_tmp_name; - struct hashfile *f; - off_t offset; - struct pack_idx_option pack_idx_opts; - - struct pack_idx_entry **written; - uint32_t alloc_written; - uint32_t nr_written; -}; - -struct odb_transaction { - struct object_database *odb; - - struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; -}; - -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - unsigned char hash[GIT_MAX_RAWSZ]; - struct strbuf packname = STRBUF_INIT; - - if (!state->f) - return; - - if (state->nr_written == 0) { - close(state->f->fd); - free_hashfile(state->f); - unlink(state->pack_tmp_name); - goto clear_exit; - } else if (state->nr_written == 1) { - finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, - CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); - } else { - int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); - fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, - state->nr_written, hash, - state->offset); - close(fd); - } - - strbuf_addf(&packname, "%s/pack/pack-%s.", - repo_get_object_directory(transaction->odb->repo), - hash_to_hex_algop(hash, repo->hash_algo)); - - finish_tmp_packfile(transaction, &packname, hash); - for (uint32_t i = 0; i < state->nr_written; i++) - free(state->written[i]); - -clear_exit: - free(state->pack_tmp_name); - free(state->written); - memset(state, 0, sizeof(*state)); - - strbuf_release(&packname); - /* Make objects we just wrote available to ourselves */ - reprepare_packed_git(repo); -} - -/* - * Cleanup after batch-mode fsync_object_files. - */ -static void flush_batch_fsync(struct odb_transaction *transaction) -{ - struct strbuf temp_path = STRBUF_INIT; - struct tempfile *temp; - - if (!transaction->objdir) - return; - - /* - * Issue a full hardware flush against a temporary file to ensure - * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout - * request, but it has not flushed any writeback cache in the storage - * hardware or any filesystem logs. This fsync call acts as a barrier - * to ensure that the data in each new object file is durable before - * the final name is visible. - */ - strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", - repo_get_object_directory(transaction->odb->repo)); - temp = xmks_tempfile(temp_path.buf); - fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); - delete_tempfile(&temp); - strbuf_release(&temp_path); - - /* - * Make the object files visible in the primary ODB after their data is - * fully durable. - */ - tmp_objdir_migrate(transaction->objdir); - transaction->objdir = NULL; -} - -static int already_written(struct odb_transaction *transaction, - struct object_id *oid) -{ - /* The object may already exist in the repository */ - if (odb_has_object(transaction->odb, oid, - HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) - return 1; - - /* Might want to keep the list sorted */ - for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) - if (oideq(&transaction->packfile.written[i]->oid, oid)) - return 1; - - /* This is a new object we need to keep */ - return 0; -} - -/* - * Read the contents from fd for size bytes, streaming it to the - * packfile in state while updating the hash in ctx. Signal a failure - * by returning a negative value when the resulting pack would exceed - * the pack size limit and this is not the first object in the pack, - * so that the caller can discard what we wrote from the current pack - * by truncating it and opening a new one. The caller will then call - * us again after rewinding the input fd. - * - * The already_hashed_to pointer is kept untouched by the caller to - * make sure we do not hash the same byte when we are called - * again. This way, the caller does not have to checkpoint its hash - * status before calling us just in case we ask it to call us again - * with a new pack. - */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, - struct git_hash_ctx *ctx, off_t *already_hashed_to, - int fd, size_t size, const char *path, - unsigned flags) -{ - git_zstream s; - unsigned char ibuf[16384]; - unsigned char obuf[16384]; - unsigned hdrlen; - int status = Z_OK; - int write_object = (flags & INDEX_WRITE_OBJECT); - off_t offset = 0; - - git_deflate_init(&s, pack_compression_level); - - hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); - s.next_out = obuf + hdrlen; - s.avail_out = sizeof(obuf) - hdrlen; - - while (status != Z_STREAM_END) { - if (size && !s.avail_in) { - size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - ssize_t read_result = read_in_full(fd, ibuf, rsize); - if (read_result < 0) - die_errno("failed to read from '%s'", path); - if ((size_t)read_result != rsize) - die("failed to read %u bytes from '%s'", - (unsigned)rsize, path); - offset += rsize; - if (*already_hashed_to < offset) { - size_t hsize = offset - *already_hashed_to; - if (rsize < hsize) - hsize = rsize; - if (hsize) - git_hash_update(ctx, ibuf, hsize); - *already_hashed_to = offset; - } - s.next_in = ibuf; - s.avail_in = rsize; - size -= rsize; - } - - status = git_deflate(&s, size ? 0 : Z_FINISH); - - if (!s.avail_out || status == Z_STREAM_END) { - if (write_object) { - size_t written = s.next_out - obuf; - - /* would we bust the size limit? */ - if (state->nr_written && - pack_size_limit_cfg && - pack_size_limit_cfg < state->offset + written) { - git_deflate_abort(&s); - return -1; - } - - hashwrite(state->f, obuf, written); - state->offset += written; - } - s.next_out = obuf; - s.avail_out = sizeof(obuf); - } - - switch (status) { - case Z_OK: - case Z_BUF_ERROR: - case Z_STREAM_END: - continue; - default: - die("unexpected deflate failure: %d", status); - } - } - git_deflate_end(&s); - return 0; -} - -/* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - if (!(flags & INDEX_WRITE_OBJECT) || state->f) - return; - - state->f = create_tmp_packfile(transaction->odb->repo, - &state->pack_tmp_name); - reset_pack_idx_option(&state->pack_idx_opts); - - /* Pretend we are going to write only one object */ - state->offset = write_pack_header(state->f, 1); - if (!state->offset) - die_errno("unable to write pack header"); -} - -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; - off_t seekback, already_hashed_to; - struct git_hash_ctx ctx; - unsigned char obuf[16384]; - unsigned header_len; - struct hashfile_checkpoint checkpoint; - struct pack_idx_entry *idx = NULL; - - seekback = lseek(fd, 0, SEEK_CUR); - if (seekback == (off_t) -1) - return error("cannot find the current offset"); - - header_len = format_object_header((char *)obuf, sizeof(obuf), - OBJ_BLOB, size); - transaction->odb->repo->hash_algo->init_fn(&ctx); - git_hash_update(&ctx, obuf, header_len); - - /* Note: idx is non-NULL when we are writing */ - if ((flags & INDEX_WRITE_OBJECT) != 0) { - CALLOC_ARRAY(idx, 1); - - prepare_to_stream(transaction, flags); - hashfile_checkpoint_init(state->f, &checkpoint); - } - - already_hashed_to = 0; - - while (1) { - prepare_to_stream(transaction, flags); - if (idx) { - hashfile_checkpoint(state->f, &checkpoint); - idx->offset = state->offset; - crc32_begin(state->f); - } - if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, - fd, size, path, flags)) - break; - /* - * Writing this object to the current pack will make - * it too big; we need to truncate it, start a new - * pack, and write into it. - */ - if (!idx) - BUG("should not happen"); - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); - if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) - return error("cannot seek back"); - } - git_hash_final_oid(result_oid, &ctx); - if (!idx) - return 0; - - idx->crc32 = crc32_end(state->f); - if (already_written(transaction, result_oid)) { - hashfile_truncate(state->f, &checkpoint); - state->offset = checkpoint.offset; - free(idx); - } else { - oidcpy(&idx->oid, result_oid); - ALLOC_GROW(state->written, - state->nr_written + 1, - state->alloc_written); - state->written[state->nr_written++] = idx; - } - return 0; -} - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) -{ - /* - * We lazily create the temporary object directory - * the first time an object might be added, since - * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. - */ - if (!transaction || transaction->objdir) - return; - - transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); - if (transaction->objdir) - tmp_objdir_replace_primary_odb(transaction->objdir, 0); -} - -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename) -{ - /* - * If we have an active ODB transaction, we issue a call that - * cleans the filesystem page cache but avoids a hardware flush - * command. Later on we will issue a single hardware flush - * before renaming the objects to their final names as part of - * flush_batch_fsync. - */ - if (!transaction || !transaction->objdir || - git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { - if (errno == ENOSYS) - warning(_("core.fsyncMethod = batch is unsupported on this platform")); - fsync_or_die(fd, filename); - } -} - -struct odb_transaction *begin_odb_transaction(struct object_database *odb) -{ - if (odb->transaction) - return NULL; - - CALLOC_ARRAY(odb->transaction, 1); - odb->transaction->odb = odb; - - return odb->transaction; -} - -void end_odb_transaction(struct odb_transaction *transaction) -{ - if (!transaction) - return; - - /* - * Ensure the transaction ending matches the pending transaction. - */ - ASSERT(transaction == transaction->odb->transaction); - - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); - transaction->odb->transaction = NULL; - free(transaction); -} diff --git a/bulk-checkin.h b/bulk-checkin.h deleted file mode 100644 index eea728f0d4..0000000000 --- a/bulk-checkin.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2011, Google Inc. - */ -#ifndef BULK_CHECKIN_H -#define BULK_CHECKIN_H - -#include "object.h" -#include "odb.h" - -struct odb_transaction; - -void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction); -void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, - int fd, const char *filename); - -/* - * This writes the specified object to a packfile. Objects written here - * during the same transaction are written to the same packfile. The - * packfile is not flushed until the transaction is flushed. The caller - * is expected to ensure a valid transaction is setup for objects to be - * recorded to. - * - * This also bypasses the usual "convert-to-git" dance, and that is on - * purpose. We could write a streaming version of the converting - * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above). However, that is - * somewhat complicated, as we do not know the size of the filter - * result, which we need to know beforehand when writing a git object. - * Since the primary motivation for trying to stream from the working - * tree file and to avoid mmaping it in core is to deal with large - * binary blobs, they generally do not want to get any conversion, and - * callers should avoid this code path when filters are requested. - */ -int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *oid, int fd, size_t size, - const char *path, unsigned flags); - -/* - * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called - * to make new objects visible. If a transaction is already - * pending, NULL is returned. - */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); - -/* - * Tell the object database to make any objects from the - * current transaction visible. - */ -void end_odb_transaction(struct odb_transaction *transaction); - -#endif diff --git a/cache-tree.c b/cache-tree.c index d225554eed..79ddf6b727 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -8,7 +8,6 @@ #include "tree.h" #include "tree-walk.h" #include "cache-tree.h" -#include "bulk-checkin.h" #include "object-file.h" #include "odb.h" #include "read-cache-ll.h" diff --git a/meson.build b/meson.build index b3dfcc0497..fccb6d2eec 100644 --- a/meson.build +++ b/meson.build @@ -287,7 +287,6 @@ libgit_sources = [ 'blob.c', 'bloom.c', 'branch.c', - 'bulk-checkin.c', 'bundle-uri.c', 'bundle.c', 'cache-tree.c', diff --git a/object-file.c b/object-file.c index 5e76573549..03f9931b83 100644 --- a/object-file.c +++ b/object-file.c @@ -10,7 +10,6 @@ #define USE_THE_REPOSITORY_VARIABLE #include "git-compat-util.h" -#include "bulk-checkin.h" #include "convert.h" #include "dir.h" #include "environment.h" @@ -28,6 +27,8 @@ #include "read-cache-ll.h" #include "setup.h" #include "streaming.h" +#include "tempfile.h" +#include "tmp-objdir.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -666,6 +667,93 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } +struct bulk_checkin_packfile { + char *pack_tmp_name; + struct hashfile *f; + off_t offset; + struct pack_idx_option pack_idx_opts; + + struct pack_idx_entry **written; + uint32_t alloc_written; + uint32_t nr_written; +}; + +struct odb_transaction { + struct object_database *odb; + + struct tmp_objdir *objdir; + struct bulk_checkin_packfile packfile; +}; + +static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +{ + /* + * We lazily create the temporary object directory + * the first time an object might be added, since + * callers may not know whether any objects will be + * added at the time they call begin_odb_transaction. + */ + if (!transaction || transaction->objdir) + return; + + transaction->objdir = tmp_objdir_create(transaction->odb->repo, "bulk-fsync"); + if (transaction->objdir) + tmp_objdir_replace_primary_odb(transaction->objdir, 0); +} + +static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, + int fd, const char *filename) +{ + /* + * If we have an active ODB transaction, we issue a call that + * cleans the filesystem page cache but avoids a hardware flush + * command. Later on we will issue a single hardware flush + * before renaming the objects to their final names as part of + * flush_batch_fsync. + */ + if (!transaction || !transaction->objdir || + git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) { + if (errno == ENOSYS) + warning(_("core.fsyncMethod = batch is unsupported on this platform")); + fsync_or_die(fd, filename); + } +} + +/* + * Cleanup after batch-mode fsync_object_files. + */ +static void flush_batch_fsync(struct odb_transaction *transaction) +{ + struct strbuf temp_path = STRBUF_INIT; + struct tempfile *temp; + + if (!transaction->objdir) + return; + + /* + * Issue a full hardware flush against a temporary file to ensure + * that all objects are durable before any renames occur. The code in + * fsync_loose_object_bulk_checkin has already issued a writeout + * request, but it has not flushed any writeback cache in the storage + * hardware or any filesystem logs. This fsync call acts as a barrier + * to ensure that the data in each new object file is durable before + * the final name is visible. + */ + strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", + repo_get_object_directory(transaction->odb->repo)); + temp = xmks_tempfile(temp_path.buf); + fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp)); + delete_tempfile(&temp); + strbuf_release(&temp_path); + + /* + * Make the object files visible in the primary ODB after their data is + * fully durable. + */ + tmp_objdir_migrate(transaction->objdir); + transaction->objdir = NULL; +} + /* Finalize a file on disk, and close it. */ static void close_loose_object(struct odb_source *source, int fd, const char *filename) @@ -1243,6 +1331,283 @@ static int index_core(struct index_state *istate, return ret; } +static int already_written(struct odb_transaction *transaction, + struct object_id *oid) +{ + /* The object may already exist in the repository */ + if (odb_has_object(transaction->odb, oid, + HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR)) + return 1; + + /* Might want to keep the list sorted */ + for (uint32_t i = 0; i < transaction->packfile.nr_written; i++) + if (oideq(&transaction->packfile.written[i]->oid, oid)) + return 1; + + /* This is a new object we need to keep */ + return 0; +} + +/* Lazily create backing packfile for the state */ +static void prepare_to_stream(struct odb_transaction *transaction, + unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + if (!(flags & INDEX_WRITE_OBJECT) || state->f) + return; + + state->f = create_tmp_packfile(transaction->odb->repo, + &state->pack_tmp_name); + reset_pack_idx_option(&state->pack_idx_opts); + + /* Pretend we are going to write only one object */ + state->offset = write_pack_header(state->f, 1); + if (!state->offset) + die_errno("unable to write pack header"); +} + +/* + * Read the contents from fd for size bytes, streaming it to the + * packfile in state while updating the hash in ctx. Signal a failure + * by returning a negative value when the resulting pack would exceed + * the pack size limit and this is not the first object in the pack, + * so that the caller can discard what we wrote from the current pack + * by truncating it and opening a new one. The caller will then call + * us again after rewinding the input fd. + * + * The already_hashed_to pointer is kept untouched by the caller to + * make sure we do not hash the same byte when we are called + * again. This way, the caller does not have to checkpoint its hash + * status before calling us just in case we ask it to call us again + * with a new pack. + */ +static int stream_blob_to_pack(struct bulk_checkin_packfile *state, + struct git_hash_ctx *ctx, off_t *already_hashed_to, + int fd, size_t size, const char *path, + unsigned flags) +{ + git_zstream s; + unsigned char ibuf[16384]; + unsigned char obuf[16384]; + unsigned hdrlen; + int status = Z_OK; + int write_object = (flags & INDEX_WRITE_OBJECT); + off_t offset = 0; + + git_deflate_init(&s, pack_compression_level); + + hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size); + s.next_out = obuf + hdrlen; + s.avail_out = sizeof(obuf) - hdrlen; + + while (status != Z_STREAM_END) { + if (size && !s.avail_in) { + size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if ((size_t)read_result != rsize) + die("failed to read %u bytes from '%s'", + (unsigned)rsize, path); + offset += rsize; + if (*already_hashed_to < offset) { + size_t hsize = offset - *already_hashed_to; + if (rsize < hsize) + hsize = rsize; + if (hsize) + git_hash_update(ctx, ibuf, hsize); + *already_hashed_to = offset; + } + s.next_in = ibuf; + s.avail_in = rsize; + size -= rsize; + } + + status = git_deflate(&s, size ? 0 : Z_FINISH); + + if (!s.avail_out || status == Z_STREAM_END) { + if (write_object) { + size_t written = s.next_out - obuf; + + /* would we bust the size limit? */ + if (state->nr_written && + pack_size_limit_cfg && + pack_size_limit_cfg < state->offset + written) { + git_deflate_abort(&s); + return -1; + } + + hashwrite(state->f, obuf, written); + state->offset += written; + } + s.next_out = obuf; + s.avail_out = sizeof(obuf); + } + + switch (status) { + case Z_OK: + case Z_BUF_ERROR: + case Z_STREAM_END: + continue; + default: + die("unexpected deflate failure: %d", status); + } + } + git_deflate_end(&s); + return 0; +} + +static void finish_tmp_packfile(struct odb_transaction *transaction, + struct strbuf *basename, + unsigned char hash[]) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + char *idx_tmp_name = NULL; + + stage_tmp_packfiles(repo, basename, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); + + free(idx_tmp_name); +} + +static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + struct repository *repo = transaction->odb->repo; + unsigned char hash[GIT_MAX_RAWSZ]; + struct strbuf packname = STRBUF_INIT; + + if (!state->f) + return; + + if (state->nr_written == 0) { + close(state->f->fd); + free_hashfile(state->f); + unlink(state->pack_tmp_name); + goto clear_exit; + } else if (state->nr_written == 1) { + finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, + CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0); + fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name, + state->nr_written, hash, + state->offset); + close(fd); + } + + strbuf_addf(&packname, "%s/pack/pack-%s.", + repo_get_object_directory(transaction->odb->repo), + hash_to_hex_algop(hash, repo->hash_algo)); + + finish_tmp_packfile(transaction, &packname, hash); + for (uint32_t i = 0; i < state->nr_written; i++) + free(state->written[i]); + +clear_exit: + free(state->pack_tmp_name); + free(state->written); + memset(state, 0, sizeof(*state)); + + strbuf_release(&packname); + /* Make objects we just wrote available to ourselves */ + reprepare_packed_git(repo); +} + +/* + * This writes the specified object to a packfile. Objects written here + * during the same transaction are written to the same packfile. The + * packfile is not flushed until the transaction is flushed. The caller + * is expected to ensure a valid transaction is setup for objects to be + * recorded to. + * + * This also bypasses the usual "convert-to-git" dance, and that is on + * purpose. We could write a streaming version of the converting + * functions and insert that before feeding the data to fast-import + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. + */ +static int index_blob_bulk_checkin(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, size_t size, + const char *path, unsigned flags) +{ + struct bulk_checkin_packfile *state = &transaction->packfile; + off_t seekback, already_hashed_to; + struct git_hash_ctx ctx; + unsigned char obuf[16384]; + unsigned header_len; + struct hashfile_checkpoint checkpoint; + struct pack_idx_entry *idx = NULL; + + seekback = lseek(fd, 0, SEEK_CUR); + if (seekback == (off_t)-1) + return error("cannot find the current offset"); + + header_len = format_object_header((char *)obuf, sizeof(obuf), + OBJ_BLOB, size); + transaction->odb->repo->hash_algo->init_fn(&ctx); + git_hash_update(&ctx, obuf, header_len); + + /* Note: idx is non-NULL when we are writing */ + if ((flags & INDEX_WRITE_OBJECT) != 0) { + CALLOC_ARRAY(idx, 1); + + prepare_to_stream(transaction, flags); + hashfile_checkpoint_init(state->f, &checkpoint); + } + + already_hashed_to = 0; + + while (1) { + prepare_to_stream(transaction, flags); + if (idx) { + hashfile_checkpoint(state->f, &checkpoint); + idx->offset = state->offset; + crc32_begin(state->f); + } + if (!stream_blob_to_pack(state, &ctx, &already_hashed_to, + fd, size, path, flags)) + break; + /* + * Writing this object to the current pack will make + * it too big; we need to truncate it, start a new + * pack, and write into it. + */ + if (!idx) + BUG("should not happen"); + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + flush_bulk_checkin_packfile(transaction); + if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) + return error("cannot seek back"); + } + git_hash_final_oid(result_oid, &ctx); + if (!idx) + return 0; + + idx->crc32 = crc32_end(state->f); + if (already_written(transaction, result_oid)) { + hashfile_truncate(state->f, &checkpoint); + state->offset = checkpoint.offset; + free(idx); + } else { + oidcpy(&idx->oid, result_oid); + ALLOC_GROW(state->written, + state->nr_written + 1, + state->alloc_written); + state->written[state->nr_written++] = idx; + } + return 0; +} + int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags) @@ -1609,3 +1974,30 @@ out: munmap(map, mapsize); return ret; } + +struct odb_transaction *begin_odb_transaction(struct object_database *odb) +{ + if (odb->transaction) + return NULL; + + CALLOC_ARRAY(odb->transaction, 1); + odb->transaction->odb = odb; + + return odb->transaction; +} + +void end_odb_transaction(struct odb_transaction *transaction) +{ + if (!transaction) + return; + + /* + * Ensure the transaction ending matches the pending transaction. + */ + ASSERT(transaction == transaction->odb->transaction); + + flush_batch_fsync(transaction); + flush_bulk_checkin_packfile(transaction); + transaction->odb->transaction = NULL; + free(transaction); +} diff --git a/object-file.h b/object-file.h index 15d97630d3..6323d2e63c 100644 --- a/object-file.h +++ b/object-file.h @@ -218,4 +218,20 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +struct odb_transaction; + +/* + * Tell the object database to optimize for adding + * multiple objects. end_odb_transaction must be called + * to make new objects visible. If a transaction is already + * pending, NULL is returned. + */ +struct odb_transaction *begin_odb_transaction(struct object_database *odb); + +/* + * Tell the object database to make any objects from the + * current transaction visible. + */ +void end_odb_transaction(struct odb_transaction *transaction); + #endif /* OBJECT_FILE_H */ diff --git a/read-cache.c b/read-cache.c index 229b8ef11c..80591eeced 100644 --- a/read-cache.c +++ b/read-cache.c @@ -8,7 +8,6 @@ #define DISABLE_SIGN_COMPARE_WARNINGS #include "git-compat-util.h" -#include "bulk-checkin.h" #include "config.h" #include "date.h" #include "diff.h" -- cgit v1.3 From ed0f5f93e9f0b0b3cc1a37ee5b10b625590f08c8 Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:37 -0500 Subject: object-file: update naming from bulk-checkin Update the names of several functions and types relocated from the bulk-checkin subsystem for better clarity. Also drop finish_tmp_packfile() as a standalone function in favor of embedding it in flush_packfile_transaction() directly. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- object-file.c | 80 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) (limited to 'object-file.c') diff --git a/object-file.c b/object-file.c index 03f9931b83..8103a2bf41 100644 --- a/object-file.c +++ b/object-file.c @@ -667,7 +667,7 @@ void hash_object_file(const struct git_hash_algo *algo, const void *buf, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); } -struct bulk_checkin_packfile { +struct transaction_packfile { char *pack_tmp_name; struct hashfile *f; off_t offset; @@ -682,10 +682,10 @@ struct odb_transaction { struct object_database *odb; struct tmp_objdir *objdir; - struct bulk_checkin_packfile packfile; + struct transaction_packfile packfile; }; -static void prepare_loose_object_bulk_checkin(struct odb_transaction *transaction) +static void prepare_loose_object_transaction(struct odb_transaction *transaction) { /* * We lazily create the temporary object directory @@ -701,7 +701,7 @@ static void prepare_loose_object_bulk_checkin(struct odb_transaction *transactio tmp_objdir_replace_primary_odb(transaction->objdir, 0); } -static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, +static void fsync_loose_object_transaction(struct odb_transaction *transaction, int fd, const char *filename) { /* @@ -722,7 +722,7 @@ static void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction, /* * Cleanup after batch-mode fsync_object_files. */ -static void flush_batch_fsync(struct odb_transaction *transaction) +static void flush_loose_object_transaction(struct odb_transaction *transaction) { struct strbuf temp_path = STRBUF_INIT; struct tempfile *temp; @@ -733,7 +733,7 @@ static void flush_batch_fsync(struct odb_transaction *transaction) /* * Issue a full hardware flush against a temporary file to ensure * that all objects are durable before any renames occur. The code in - * fsync_loose_object_bulk_checkin has already issued a writeout + * fsync_loose_object_transaction has already issued a writeout * request, but it has not flushed any writeback cache in the storage * hardware or any filesystem logs. This fsync call acts as a barrier * to ensure that the data in each new object file is durable before @@ -762,7 +762,7 @@ static void close_loose_object(struct odb_source *source, goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_bulk_checkin(source->odb->transaction, fd, filename); + fsync_loose_object_transaction(source->odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -940,7 +940,7 @@ static int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); odb_loose_path(source, &filename, oid); @@ -1029,7 +1029,7 @@ int stream_loose_object(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_bulk_checkin(source->odb->transaction); + prepare_loose_object_transaction(source->odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ strbuf_addf(&filename, "%s/", source->path); @@ -1349,10 +1349,10 @@ static int already_written(struct odb_transaction *transaction, } /* Lazily create backing packfile for the state */ -static void prepare_to_stream(struct odb_transaction *transaction, - unsigned flags) +static void prepare_packfile_transaction(struct odb_transaction *transaction, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; if (!(flags & INDEX_WRITE_OBJECT) || state->f) return; @@ -1381,7 +1381,7 @@ static void prepare_to_stream(struct odb_transaction *transaction, * status before calling us just in case we ask it to call us again * with a new pack. */ -static int stream_blob_to_pack(struct bulk_checkin_packfile *state, +static int stream_blob_to_pack(struct transaction_packfile *state, struct git_hash_ctx *ctx, off_t *already_hashed_to, int fd, size_t size, const char *path, unsigned flags) @@ -1457,28 +1457,13 @@ static int stream_blob_to_pack(struct bulk_checkin_packfile *state, return 0; } -static void finish_tmp_packfile(struct odb_transaction *transaction, - struct strbuf *basename, - unsigned char hash[]) +static void flush_packfile_transaction(struct odb_transaction *transaction) { - struct bulk_checkin_packfile *state = &transaction->packfile; - struct repository *repo = transaction->odb->repo; - char *idx_tmp_name = NULL; - - stage_tmp_packfiles(repo, basename, state->pack_tmp_name, - state->written, state->nr_written, NULL, - &state->pack_idx_opts, hash, &idx_tmp_name); - rename_tmp_packfile_idx(repo, basename, &idx_tmp_name); - - free(idx_tmp_name); -} - -static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) -{ - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; struct repository *repo = transaction->odb->repo; unsigned char hash[GIT_MAX_RAWSZ]; struct strbuf packname = STRBUF_INIT; + char *idx_tmp_name = NULL; if (!state->f) return; @@ -1503,11 +1488,16 @@ static void flush_bulk_checkin_packfile(struct odb_transaction *transaction) repo_get_object_directory(transaction->odb->repo), hash_to_hex_algop(hash, repo->hash_algo)); - finish_tmp_packfile(transaction, &packname, hash); + stage_tmp_packfiles(repo, &packname, state->pack_tmp_name, + state->written, state->nr_written, NULL, + &state->pack_idx_opts, hash, &idx_tmp_name); + rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name); + for (uint32_t i = 0; i < state->nr_written; i++) free(state->written[i]); clear_exit: + free(idx_tmp_name); free(state->pack_tmp_name); free(state->written); memset(state, 0, sizeof(*state)); @@ -1535,11 +1525,12 @@ clear_exit: * binary blobs, they generally do not want to get any conversion, and * callers should avoid this code path when filters are requested. */ -static int index_blob_bulk_checkin(struct odb_transaction *transaction, - struct object_id *result_oid, int fd, size_t size, - const char *path, unsigned flags) +static int index_blob_packfile_transaction(struct odb_transaction *transaction, + struct object_id *result_oid, int fd, + size_t size, const char *path, + unsigned flags) { - struct bulk_checkin_packfile *state = &transaction->packfile; + struct transaction_packfile *state = &transaction->packfile; off_t seekback, already_hashed_to; struct git_hash_ctx ctx; unsigned char obuf[16384]; @@ -1560,14 +1551,14 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, if ((flags & INDEX_WRITE_OBJECT) != 0) { CALLOC_ARRAY(idx, 1); - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); hashfile_checkpoint_init(state->f, &checkpoint); } already_hashed_to = 0; while (1) { - prepare_to_stream(transaction, flags); + prepare_packfile_transaction(transaction, flags); if (idx) { hashfile_checkpoint(state->f, &checkpoint); idx->offset = state->offset; @@ -1585,7 +1576,7 @@ static int index_blob_bulk_checkin(struct odb_transaction *transaction, BUG("should not happen"); hashfile_truncate(state->f, &checkpoint); state->offset = checkpoint.offset; - flush_bulk_checkin_packfile(transaction); + flush_packfile_transaction(transaction); if (lseek(fd, seekback, SEEK_SET) == (off_t)-1) return error("cannot seek back"); } @@ -1632,9 +1623,10 @@ int index_fd(struct index_state *istate, struct object_id *oid, struct odb_transaction *transaction; transaction = begin_odb_transaction(the_repository->objects); - ret = index_blob_bulk_checkin(the_repository->objects->transaction, - oid, fd, xsize_t(st->st_size), - path, flags); + ret = index_blob_packfile_transaction(the_repository->objects->transaction, + oid, fd, + xsize_t(st->st_size), + path, flags); end_odb_transaction(transaction); } @@ -1996,8 +1988,8 @@ void end_odb_transaction(struct odb_transaction *transaction) */ ASSERT(transaction == transaction->odb->transaction); - flush_batch_fsync(transaction); - flush_bulk_checkin_packfile(transaction); + flush_loose_object_transaction(transaction); + flush_packfile_transaction(transaction); transaction->odb->transaction = NULL; free(transaction); } -- cgit v1.3 From ce1661f9da70ea2ffcb54f7b544410fad26e965d Mon Sep 17 00:00:00 2001 From: Justin Tobler Date: Tue, 16 Sep 2025 13:29:38 -0500 Subject: odb: add transaction interface Transactions are managed via the {begin,end}_odb_transaction() function in the object-file subsystem and its implementation is specific to the files object source. Introduce odb_transaction_{begin,commit}() in the odb subsystem to provide an eventual object source agnostic means to manage transactions. Update call sites to instead manage transactions through the odb subsystem. Also rename {begin,end}_odb_transaction() functions to object_file_transaction_{begin,commit}() to clarify the object source it supports. Signed-off-by: Justin Tobler Signed-off-by: Junio C Hamano --- builtin/add.c | 5 +++-- builtin/unpack-objects.c | 4 ++-- builtin/update-index.c | 7 ++++--- cache-tree.c | 4 ++-- object-file.c | 12 +++++++----- object-file.h | 6 +++--- odb.c | 10 ++++++++++ odb.h | 13 +++++++++++++ read-cache.c | 4 ++-- 9 files changed, 46 insertions(+), 19 deletions(-) (limited to 'object-file.c') diff --git a/builtin/add.c b/builtin/add.c index 8294366d68..bf312c40be 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -15,6 +15,7 @@ #include "pathspec.h" #include "run-command.h" #include "object-file.h" +#include "odb.h" #include "parse-options.h" #include "path.h" #include "preload-index.h" @@ -575,7 +576,7 @@ int cmd_add(int argc, string_list_clear(&only_match_skip_worktree, 0); } - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); ps_matched = xcalloc(pathspec.nr, 1); if (add_renormalize) @@ -594,7 +595,7 @@ int cmd_add(int argc, if (chmod_arg && pathspec.nr) exit_status |= chmod_pathspec(repo, &pathspec, chmod_arg[0], show_only); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); finish: if (write_locked_index(repo->index, &lock_file, diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4596fff0da..ef79e43715 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -599,12 +599,12 @@ static void unpack_all(void) progress = start_progress(the_repository, _("Unpacking objects"), nr_objects); CALLOC_ARRAY(obj_list, nr_objects); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); for (i = 0; i < nr_objects; i++) { unpack_one(i); display_progress(progress, i + 1); } - end_odb_transaction(transaction); + odb_transaction_commit(transaction); stop_progress(&progress); if (delta_list) diff --git a/builtin/update-index.c b/builtin/update-index.c index ee01c4e423..8a5907767b 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -18,6 +18,7 @@ #include "cache-tree.h" #include "tree-walk.h" #include "object-file.h" +#include "odb.h" #include "refs.h" #include "resolve-undo.h" #include "parse-options.h" @@ -1122,7 +1123,7 @@ int cmd_update_index(int argc, * Allow the object layer to optimize adding multiple objects in * a batch. */ - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); while (ctx.argc) { if (parseopt_state != PARSE_OPT_DONE) parseopt_state = parse_options_step(&ctx, options, @@ -1152,7 +1153,7 @@ int cmd_update_index(int argc, * a transaction. */ if (transaction && verbose) { - end_odb_transaction(transaction); + odb_transaction_commit(transaction); transaction = NULL; } @@ -1220,7 +1221,7 @@ int cmd_update_index(int argc, /* * By now we have added all of the new objects */ - end_odb_transaction(transaction); + odb_transaction_commit(transaction); if (split_index > 0) { if (repo_config_get_split_index(the_repository) == 0) diff --git a/cache-tree.c b/cache-tree.c index 79ddf6b727..2aba47060e 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -489,10 +489,10 @@ int cache_tree_update(struct index_state *istate, int flags) trace_performance_enter(); trace2_region_enter("cache_tree", "update", the_repository); - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, "", 0, &skip, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); trace2_region_leave("cache_tree", "update", the_repository); trace_performance_leave("cache_tree_update"); if (i < 0) diff --git a/object-file.c b/object-file.c index 8103a2bf41..17a236d2fe 100644 --- a/object-file.c +++ b/object-file.c @@ -691,7 +691,7 @@ static void prepare_loose_object_transaction(struct odb_transaction *transaction * We lazily create the temporary object directory * the first time an object might be added, since * callers may not know whether any objects will be - * added at the time they call begin_odb_transaction. + * added at the time they call object_file_transaction_begin. */ if (!transaction || transaction->objdir) return; @@ -1622,12 +1622,12 @@ int index_fd(struct index_state *istate, struct object_id *oid, } else { struct odb_transaction *transaction; - transaction = begin_odb_transaction(the_repository->objects); + transaction = odb_transaction_begin(the_repository->objects); ret = index_blob_packfile_transaction(the_repository->objects->transaction, oid, fd, xsize_t(st->st_size), path, flags); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); } close(fd); @@ -1967,8 +1967,10 @@ out: return ret; } -struct odb_transaction *begin_odb_transaction(struct object_database *odb) +struct odb_transaction *object_file_transaction_begin(struct odb_source *source) { + struct object_database *odb = source->odb; + if (odb->transaction) return NULL; @@ -1978,7 +1980,7 @@ struct odb_transaction *begin_odb_transaction(struct object_database *odb) return odb->transaction; } -void end_odb_transaction(struct odb_transaction *transaction) +void object_file_transaction_commit(struct odb_transaction *transaction) { if (!transaction) return; diff --git a/object-file.h b/object-file.h index 6323d2e63c..3fd48dcafb 100644 --- a/object-file.h +++ b/object-file.h @@ -222,16 +222,16 @@ struct odb_transaction; /* * Tell the object database to optimize for adding - * multiple objects. end_odb_transaction must be called + * multiple objects. object_file_transaction_commit must be called * to make new objects visible. If a transaction is already * pending, NULL is returned. */ -struct odb_transaction *begin_odb_transaction(struct object_database *odb); +struct odb_transaction *object_file_transaction_begin(struct odb_source *source); /* * Tell the object database to make any objects from the * current transaction visible. */ -void end_odb_transaction(struct odb_transaction *transaction); +void object_file_transaction_commit(struct odb_transaction *transaction); #endif /* OBJECT_FILE_H */ diff --git a/odb.c b/odb.c index 2a92a018c4..af9534bfe1 100644 --- a/odb.c +++ b/odb.c @@ -1051,3 +1051,13 @@ void odb_clear(struct object_database *o) hashmap_clear(&o->pack_map); string_list_clear(&o->submodule_source_paths, 0); } + +struct odb_transaction *odb_transaction_begin(struct object_database *odb) +{ + return object_file_transaction_begin(odb->sources); +} + +void odb_transaction_commit(struct odb_transaction *transaction) +{ + object_file_transaction_commit(transaction); +} diff --git a/odb.h b/odb.h index a89b214390..82093753c8 100644 --- a/odb.h +++ b/odb.h @@ -185,6 +185,19 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Starts an ODB transaction. Subsequent objects are written to the transaction + * and not committed until odb_transaction_commit() is invoked on the + * transaction. If the ODB already has a pending transaction, NULL is returned. + */ +struct odb_transaction *odb_transaction_begin(struct object_database *odb); + +/* + * Commits an ODB transaction making the written objects visible. If the + * specified transaction is NULL, the function is a no-op. + */ +void odb_transaction_commit(struct odb_transaction *transaction); + /* * Find source by its object directory path. Dies in case the source couldn't * be found. diff --git a/read-cache.c b/read-cache.c index 80591eeced..94098a3861 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3972,9 +3972,9 @@ int add_files_to_cache(struct repository *repo, const char *prefix, * This function is invoked from commands other than 'add', which * may not have their own transaction active. */ - transaction = begin_odb_transaction(repo->objects); + transaction = odb_transaction_begin(repo->objects); run_diff_files(&rev, DIFF_RACY_IS_MODIFIED); - end_odb_transaction(transaction); + odb_transaction_commit(transaction); release_revisions(&rev); return !!data.add_errors; -- cgit v1.3