aboutsummaryrefslogtreecommitdiff
path: root/odb.h
diff options
context:
space:
mode:
Diffstat (limited to 'odb.h')
-rw-r--r--odb.h343
1 files changed, 231 insertions, 112 deletions
diff --git a/odb.h b/odb.h
index e6602dd90c..3a711f6547 100644
--- a/odb.h
+++ b/odb.h
@@ -1,18 +1,25 @@
#ifndef ODB_H
#define ODB_H
-#include "hashmap.h"
#include "object.h"
#include "oidset.h"
#include "oidmap.h"
#include "string-list.h"
#include "thread-utils.h"
-struct oidmap;
-struct oidtree;
-struct strbuf;
+struct cached_object_entry;
+struct packed_git;
struct repository;
-struct multi_pack_index;
+struct strbuf;
+struct strvec;
+
+/*
+ * Set this to 0 to prevent odb_read_object_info_extended() from fetching missing
+ * blobs. This has a difference only if extensions.partialClone is set.
+ *
+ * Its default value is 1.
+ */
+extern int fetch_if_missing;
/*
* Compute the exact path an alternate is at and returns it. In case of
@@ -23,77 +30,23 @@ struct multi_pack_index;
char *compute_alternate_path(const char *path, struct strbuf *err);
/*
- * The source is the part of the object database that stores the actual
- * objects. It thus encapsulates the logic to read and write the specific
- * on-disk format. An object database can have multiple sources:
- *
- * - The primary source, which is typically located in "$GIT_DIR/objects".
- * This is where new objects are usually written to.
+ * A transaction may be started for an object database prior to writing new
+ * objects via odb_transaction_begin(). These objects are not committed until
+ * odb_transaction_commit() is invoked. Only a single transaction may be pending
+ * at a time.
*
- * - Alternate sources, which are configured via "objects/info/alternates" or
- * via the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable. These
- * alternate sources are only used to read objects.
+ * Each ODB source is expected to implement its own transaction handling.
*/
-struct odb_source {
- struct odb_source *next;
-
- /* Object database that owns this object source. */
- struct object_database *odb;
-
- /*
- * Used to store the results of readdir(3) calls when we are OK
- * sacrificing accuracy due to races for speed. That includes
- * object existence with OBJECT_INFO_QUICK, as well as
- * our search for unique abbreviated hashes. Don't use it for tasks
- * requiring greater accuracy!
- *
- * Be sure to call odb_load_loose_cache() before using.
- */
- uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
- struct oidtree *loose_objects_cache;
-
- /* Map between object IDs for loose objects. */
- struct loose_object_map *loose_map;
-
- /*
- * private data
- *
- * should only be accessed directly by packfile.c and midx.c
- */
- struct multi_pack_index *midx;
-
- /*
- * Figure out whether this is the local source of the owning
- * repository, which would typically be its ".git/objects" directory.
- * This local object directory is usually where objects would be
- * written to.
- */
- bool local;
-
- /*
- * This is a temporary object store created by the tmp_objdir
- * facility. Disable ref updates since the objects in the store
- * might be discarded on rollback.
- */
- int disable_ref_updates;
-
- /*
- * This object store is ephemeral, so there is no need to fsync.
- */
- int will_destroy;
+struct odb_transaction;
+typedef void (*odb_transaction_commit_fn)(struct odb_transaction *transaction);
+struct odb_transaction {
+ /* The ODB source the transaction is opened against. */
+ struct odb_source *source;
- /*
- * Path to the source. If this is a relative path, it is relative to
- * the current working directory.
- */
- char *path;
+ /* The ODB source specific callback invoked to commit a transaction. */
+ odb_transaction_commit_fn commit;
};
-struct packed_git;
-struct packfile_store;
-struct cached_object_entry;
-struct odb_transaction;
-
/*
* The object database encapsulates access to objects in a repository. It
* manages one or more sources that store the actual objects which are
@@ -139,9 +92,6 @@ struct object_database {
struct commit_graph *commit_graph;
unsigned commit_graph_attempted : 1; /* if loading has been attempted */
- /* Should only be accessed directly by packfile.c and midx.c. */
- struct packfile_store *packfiles;
-
/*
* This is meant to hold a *small* number of objects that you would
* want odb_read_object() to be able to return, but yet you do not want
@@ -154,10 +104,11 @@ struct object_database {
/*
* A fast, rough count of the number of objects in the repository.
* These two fields are not meant for direct access. Use
- * repo_approximate_object_count() instead.
+ * odb_count_objects() instead.
*/
- unsigned long approximate_object_count;
- unsigned approximate_object_count_valid : 1;
+ unsigned long object_count;
+ unsigned object_count_flags;
+ unsigned object_count_valid : 1;
/*
* Submodule source paths that will be added as additional sources to
@@ -166,8 +117,30 @@ struct object_database {
struct string_list submodule_source_paths;
};
-struct object_database *odb_new(struct repository *repo);
-void odb_clear(struct object_database *o);
+/*
+ * Create a new object database for the given repository.
+ *
+ * If the primary source parameter is set it will override the usual primary
+ * object directory derived from the repository's common directory. The
+ * alternate sources are expected to be a PATH_SEP-separated list of secondary
+ * sources. Note that these alternate sources will be added in addition to, not
+ * instead of, the alternates identified by the primary source.
+ *
+ * Returns the newly created object database.
+ */
+struct object_database *odb_new(struct repository *repo,
+ const char *primary_source,
+ const char *alternate_sources);
+
+/* Free the object database and release all resources. */
+void odb_free(struct object_database *o);
+
+/*
+ * Close the object database and all of its sources so that any held resources
+ * will be released. The database can still be used after closing it, in which
+ * case these resources may be reallocated.
+ */
+void odb_close(struct object_database *o);
/*
* Clear caches, reload alternates and then reload object sources so that new
@@ -314,12 +287,24 @@ struct object_info {
struct object_id *delta_base_oid;
void **contentp;
+ /*
+ * The time the given looked-up object has been last modified.
+ *
+ * Note: the mtime may be ambiguous in case the object exists multiple
+ * times in the object database. It is thus _not_ recommended to use
+ * this field outside of contexts where you would read every instance
+ * of the object, like for example with `odb_for_each_object()`. As it
+ * is impossible to say at the ODB level what the intent of the caller
+ * is (e.g. whether to find the oldest or newest object), it is the
+ * responsibility of the caller to disambiguate the mtimes.
+ */
+ time_t *mtimep;
+
/* Response */
enum {
OI_CACHED,
OI_LOOSE,
OI_PACKED,
- OI_DBCACHED
} whence;
union {
/*
@@ -333,7 +318,12 @@ struct object_info {
struct {
struct packed_git *pack;
off_t offset;
- unsigned int is_delta;
+ enum packed_object_type {
+ PACKED_OBJECT_TYPE_UNKNOWN,
+ PACKED_OBJECT_TYPE_FULL,
+ PACKED_OBJECT_TYPE_OFS_DELTA,
+ PACKED_OBJECT_TYPE_REF_DELTA,
+ } type;
} packed;
} u;
};
@@ -344,23 +334,41 @@ struct object_info {
*/
#define OBJECT_INFO_INIT { 0 }
-/* Invoke lookup_replace_object() on the given hash */
-#define OBJECT_INFO_LOOKUP_REPLACE 1
-/* Do not retry packed storage after checking packed and loose storage */
-#define OBJECT_INFO_QUICK 8
-/*
- * Do not attempt to fetch the object if missing (even if fetch_is_missing is
- * nonzero).
- */
-#define OBJECT_INFO_SKIP_FETCH_OBJECT 16
-/*
- * This is meant for bulk prefetching of missing blobs in a partial
- * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK
- */
-#define OBJECT_INFO_FOR_PREFETCH (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)
+/* Flags that can be passed to `odb_read_object_info_extended()`. */
+enum object_info_flags {
+ /* Invoke lookup_replace_object() on the given hash. */
+ OBJECT_INFO_LOOKUP_REPLACE = (1 << 0),
-/* Die if object corruption (not just an object being missing) was detected. */
-#define OBJECT_INFO_DIE_IF_CORRUPT 32
+ /* Do not reprepare object sources when the first lookup has failed. */
+ OBJECT_INFO_QUICK = (1 << 1),
+
+ /*
+ * Do not attempt to fetch the object if missing (even if fetch_is_missing is
+ * nonzero).
+ */
+ OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2),
+
+ /* Die if object corruption (not just an object being missing) was detected. */
+ OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3),
+
+ /*
+ * We have already tried reading the object, but it couldn't be found
+ * via any of the attached sources, and are now doing a second read.
+ * This second read asks the individual sources to also evaluate
+ * whether any on-disk state may have changed that may have caused the
+ * object to appear.
+ *
+ * This flag is for internal use, only. The second read only occurs
+ * when `OBJECT_INFO_QUICK` was not passed.
+ */
+ OBJECT_INFO_SECOND_READ = (1 << 4),
+
+ /*
+ * This is meant for bulk prefetching of missing blobs in a partial
+ * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK.
+ */
+ OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK),
+};
/*
* Read object info from the object database and populate the `object_info`
@@ -369,7 +377,7 @@ struct object_info {
int odb_read_object_info_extended(struct object_database *odb,
const struct object_id *oid,
struct object_info *oi,
- unsigned flags);
+ enum object_info_flags flags);
/*
* Read a subset of object info for the given object ID. Returns an `enum
@@ -381,11 +389,11 @@ int odb_read_object_info(struct object_database *odb,
const struct object_id *oid,
unsigned long *sizep);
-enum {
+enum odb_has_object_flags {
/* Retry packed storage after checking packed and loose storage */
- HAS_OBJECT_RECHECK_PACKED = (1 << 0),
+ ODB_HAS_OBJECT_RECHECK_PACKED = (1 << 0),
/* Allow fetching the object in case the repository has a promisor remote. */
- HAS_OBJECT_FETCH_PROMISOR = (1 << 1),
+ ODB_HAS_OBJECT_FETCH_PROMISOR = (1 << 1),
};
/*
@@ -394,7 +402,10 @@ enum {
*/
int odb_has_object(struct object_database *odb,
const struct object_id *oid,
- unsigned flags);
+ enum odb_has_object_flags flags);
+
+int odb_freshen_object(struct object_database *odb,
+ const struct object_id *oid);
void odb_assert_oid_type(struct object_database *odb,
const struct object_id *oid, enum object_type expect);
@@ -431,39 +442,132 @@ static inline void obj_read_unlock(void)
if(obj_read_use_lock)
pthread_mutex_unlock(&obj_read_mutex);
}
+
/* Flags for for_each_*_object(). */
-enum for_each_object_flags {
+enum odb_for_each_object_flags {
/* Iterate only over local objects, not alternates. */
- FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
+ ODB_FOR_EACH_OBJECT_LOCAL_ONLY = (1<<0),
/* Only iterate over packs obtained from the promisor remote. */
- FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
+ ODB_FOR_EACH_OBJECT_PROMISOR_ONLY = (1<<1),
/*
* Visit objects within a pack in packfile order rather than .idx order
*/
- FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
+ ODB_FOR_EACH_OBJECT_PACK_ORDER = (1<<2),
/* Only iterate over packs that are not marked as kept in-core. */
- FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
+ ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS = (1<<3),
/* Only iterate over packs that do not have .keep files. */
- FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+ ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4),
+};
+
+/*
+ * A callback function that can be used to iterate through objects. If given,
+ * the optional `oi` parameter will be populated the same as if you would call
+ * `odb_read_object_info()`.
+ *
+ * Returning a non-zero error code will cause iteration to abort. The error
+ * code will be propagated.
+ */
+typedef int (*odb_for_each_object_cb)(const struct object_id *oid,
+ struct object_info *oi,
+ void *cb_data);
+
+/*
+ * Options that can be passed to `odb_for_each_object()` and its
+ * backend-specific implementations.
+ */
+struct odb_for_each_object_options {
+ /* A bitfield of `odb_for_each_object_flags`. */
+ enum odb_for_each_object_flags flags;
+
+ /*
+ * If set, only iterate through objects whose first `prefix_hex_len`
+ * hex characters matches the given prefix.
+ */
+ const struct object_id *prefix;
+ size_t prefix_hex_len;
};
-enum {
+/*
+ * Iterate through all objects contained in the object database. Note that
+ * objects may be iterated over multiple times in case they are either stored
+ * in different backends or in case they are stored in multiple sources.
+ * If an object info request is given, then the object info will be read and
+ * passed to the callback as if `odb_read_object_info()` was called for the
+ * object.
+ *
+ * Returning a non-zero error code from the callback function will cause
+ * iteration to abort. The error code will be propagated.
+ *
+ * Returns 0 on success, a negative error code in case a failure occurred, or
+ * an arbitrary non-zero error code returned by the callback itself.
+ */
+int odb_for_each_object_ext(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ const struct odb_for_each_object_options *opts);
+
+/* Same as `odb_for_each_object_ext()` with `opts.flags` set to the given flags. */
+int odb_for_each_object(struct object_database *odb,
+ const struct object_info *request,
+ odb_for_each_object_cb cb,
+ void *cb_data,
+ enum odb_for_each_object_flags flags);
+
+enum odb_count_objects_flags {
+ /*
+ * Instead of providing an accurate count, allow the number of objects
+ * to be approximated. Details of how this approximation works are
+ * subject to the specific source's implementation.
+ */
+ ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0),
+};
+
+/*
+ * Count the number of objects in the given object database. This object count
+ * may double-count objects that are stored in multiple backends, or which are
+ * stored multiple times in a single backend.
+ *
+ * Returns 0 on success, a negative error code otherwise. The number of objects
+ * will be assigned to the `out` pointer on success.
+ */
+int odb_count_objects(struct object_database *odb,
+ enum odb_count_objects_flags flags,
+ unsigned long *out);
+
+/*
+ * Given an object ID, find the minimum required length required to make the
+ * object ID unique across the whole object database.
+ *
+ * The `min_len` determines the minimum abbreviated length that'll be returned
+ * by this function. If `min_len < 0`, then the function will set a sensible
+ * default minimum abbreviation length.
+ *
+ * Returns 0 on success, a negative error code otherwise. The computed length
+ * will be assigned to `*out`.
+ */
+int odb_find_abbrev_len(struct object_database *odb,
+ const struct object_id *oid,
+ int min_len,
+ unsigned *out);
+
+enum odb_write_object_flags {
/*
* By default, `odb_write_object()` does not actually write anything
* into the object store, but only computes the object ID. This flag
* changes that so that the object will be written as a loose object
* and persisted.
*/
- WRITE_OBJECT_PERSIST = (1 << 0),
+ ODB_WRITE_OBJECT_PERSIST = (1 << 0),
/*
* Do not print an error in case something goes wrong.
*/
- WRITE_OBJECT_SILENT = (1 << 1),
+ ODB_WRITE_OBJECT_SILENT = (1 << 1),
};
/*
@@ -479,7 +583,7 @@ int odb_write_object_ext(struct object_database *odb,
enum object_type type,
struct object_id *oid,
struct object_id *compat_oid,
- unsigned flags);
+ enum odb_write_object_flags flags);
static inline int odb_write_object(struct object_database *odb,
const void *buf, unsigned long len,
@@ -489,4 +593,19 @@ static inline int odb_write_object(struct object_database *odb,
return odb_write_object_ext(odb, buf, len, type, oid, NULL, 0);
}
+struct odb_write_stream {
+ const void *(*read)(struct odb_write_stream *, unsigned long *len);
+ void *data;
+ int is_finished;
+};
+
+int odb_write_object_stream(struct object_database *odb,
+ struct odb_write_stream *stream, size_t len,
+ struct object_id *oid);
+
+void parse_alternates(const char *string,
+ int sep,
+ const char *relative_base,
+ struct strvec *out);
+
#endif /* ODB_H */