aboutsummaryrefslogtreecommitdiff
path: root/builtin/gc.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/gc.c')
-rw-r--r--builtin/gc.c178
1 files changed, 120 insertions, 58 deletions
diff --git a/builtin/gc.c b/builtin/gc.c
index d212cbb9b8..3a71e314c9 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -36,6 +36,7 @@
#include "reflog.h"
#include "repack.h"
#include "rerere.h"
+#include "revision.h"
#include "blob.h"
#include "tree.h"
#include "promisor-remote.h"
@@ -286,12 +287,26 @@ static void maintenance_run_opts_release(struct maintenance_run_opts *opts)
static int pack_refs_condition(UNUSED struct gc_config *cfg)
{
- /*
- * The auto-repacking logic for refs is handled by the ref backends and
- * exposed via `git pack-refs --auto`. We thus always return truish
- * here and let the backend decide for us.
- */
- return 1;
+ struct string_list included_refs = STRING_LIST_INIT_NODUP;
+ struct ref_exclusions excludes = REF_EXCLUSIONS_INIT;
+ struct refs_optimize_opts optimize_opts = {
+ .exclusions = &excludes,
+ .includes = &included_refs,
+ .flags = REFS_OPTIMIZE_PRUNE | REFS_OPTIMIZE_AUTO,
+ };
+ bool required;
+
+ /* Check for all refs, similar to 'git refs optimize --all'. */
+ string_list_append(optimize_opts.includes, "*");
+
+ if (refs_optimize_required(get_main_ref_store(the_repository),
+ &optimize_opts, &required))
+ return 0;
+
+ clear_ref_exclusions(&excludes);
+ string_list_clear(&included_refs, 0);
+
+ return required;
}
static int maintenance_task_pack_refs(struct maintenance_run_opts *opts,
@@ -452,37 +467,19 @@ out:
static int too_many_loose_objects(int limit)
{
/*
- * Quickly check if a "gc" is needed, by estimating how
- * many loose objects there are. Because SHA-1 is evenly
- * distributed, we can check only one and get a reasonable
- * estimate.
+ * This is weird, but stems from legacy behaviour: the GC auto
+ * threshold was always essentially interpreted as if it was rounded up
+ * to the next multiple 256 of, so we retain this behaviour for now.
*/
- DIR *dir;
- struct dirent *ent;
- int auto_threshold;
- int num_loose = 0;
- int needed = 0;
- const unsigned hexsz_loose = the_hash_algo->hexsz - 2;
- char *path;
+ int auto_threshold = DIV_ROUND_UP(limit, 256) * 256;
+ unsigned long loose_count;
- path = repo_git_path(the_repository, "objects/17");
- dir = opendir(path);
- free(path);
- if (!dir)
+ if (odb_source_loose_count_objects(the_repository->objects->sources,
+ ODB_COUNT_OBJECTS_APPROXIMATE,
+ &loose_count) < 0)
return 0;
- auto_threshold = DIV_ROUND_UP(limit, 256);
- while ((ent = readdir(dir)) != NULL) {
- if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose ||
- ent->d_name[hexsz_loose] != '\0')
- continue;
- if (++num_loose > auto_threshold) {
- needed = 1;
- break;
- }
- }
- closedir(dir);
- return needed;
+ return loose_count > auto_threshold;
}
static struct packed_git *find_base_packs(struct string_list *packs,
@@ -577,9 +574,13 @@ static uint64_t total_ram(void)
static uint64_t estimate_repack_memory(struct gc_config *cfg,
struct packed_git *pack)
{
- unsigned long nr_objects = repo_approximate_object_count(the_repository);
+ unsigned long nr_objects;
size_t os_cache, heap;
+ if (odb_count_objects(the_repository->objects,
+ ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0)
+ return 0;
+
if (!pack || !nr_objects)
return 0;
@@ -1015,7 +1016,7 @@ int cmd_gc(int argc,
struct child_process repack_cmd = CHILD_PROCESS_INIT;
repack_cmd.git_cmd = 1;
- repack_cmd.close_object_store = 1;
+ repack_cmd.odb_to_close = the_repository->objects;
strvec_pushv(&repack_cmd.args, repack_args.v);
if (run_command(&repack_cmd))
die(FAILED_RUN, repack_args.v[0]);
@@ -1048,7 +1049,7 @@ int cmd_gc(int argc,
report_garbage = report_pack_garbage;
odb_reprepare(the_repository->objects);
if (pack_garbage.nr > 0) {
- close_object_store(the_repository->objects);
+ odb_close(the_repository->objects);
clean_pack_garbage();
}
@@ -1095,34 +1096,30 @@ static int maintenance_opt_schedule(const struct option *opt, const char *arg,
return 0;
}
-/* Remember to update object flag allocation in object.h */
-#define SEEN (1u<<0)
-
struct cg_auto_data {
int num_not_in_graph;
int limit;
};
-static int dfs_on_ref(const char *refname UNUSED,
- const char *referent UNUSED,
- const struct object_id *oid,
- int flags UNUSED,
- void *cb_data)
+static int dfs_on_ref(const struct reference *ref, void *cb_data)
{
struct cg_auto_data *data = (struct cg_auto_data *)cb_data;
int result = 0;
+ const struct object_id *maybe_peeled = ref->oid;
struct object_id peeled;
struct commit_list *stack = NULL;
struct commit *commit;
- if (!peel_iterated_oid(the_repository, oid, &peeled))
- oid = &peeled;
- if (odb_read_object_info(the_repository->objects, oid, NULL) != OBJ_COMMIT)
+ if (!reference_get_peeled_oid(the_repository, ref, &peeled))
+ maybe_peeled = &peeled;
+ if (odb_read_object_info(the_repository->objects, maybe_peeled, NULL) != OBJ_COMMIT)
return 0;
- commit = lookup_commit(the_repository, oid);
- if (!commit)
+ commit = lookup_commit(the_repository, maybe_peeled);
+ if (!commit || commit->object.flags & SEEN)
return 0;
+ commit->object.flags |= SEEN;
+
if (repo_parse_commit(the_repository, commit) ||
commit_graph_position(commit) != COMMIT_NOT_FROM_GRAPH)
return 0;
@@ -1132,7 +1129,7 @@ static int dfs_on_ref(const char *refname UNUSED,
if (data->num_not_in_graph >= data->limit)
return 1;
- commit_list_append(commit, &stack);
+ commit_list_insert(commit, &stack);
while (!result && stack) {
struct commit_list *parent;
@@ -1153,11 +1150,11 @@ static int dfs_on_ref(const char *refname UNUSED,
break;
}
- commit_list_append(parent->item, &stack);
+ commit_list_insert(parent->item, &stack);
}
}
- free_commit_list(stack);
+ commit_list_free(stack);
return result;
}
@@ -1188,7 +1185,8 @@ static int run_write_commit_graph(struct maintenance_run_opts *opts)
{
struct child_process child = CHILD_PROCESS_INIT;
- child.git_cmd = child.close_object_store = 1;
+ child.git_cmd = 1;
+ child.odb_to_close = the_repository->objects;
strvec_pushl(&child.args, "commit-graph", "write",
"--split", "--reachable", NULL);
@@ -1257,7 +1255,8 @@ static int maintenance_task_gc_background(struct maintenance_run_opts *opts,
{
struct child_process child = CHILD_PROCESS_INIT;
- child.git_cmd = child.close_object_store = 1;
+ child.git_cmd = 1;
+ child.odb_to_close = the_repository->objects;
strvec_push(&child.args, "gc");
if (opts->auto_flag)
@@ -1473,7 +1472,8 @@ static int multi_pack_index_expire(struct maintenance_run_opts *opts)
{
struct child_process child = CHILD_PROCESS_INIT;
- child.git_cmd = child.close_object_store = 1;
+ child.git_cmd = 1;
+ child.odb_to_close = the_repository->objects;
strvec_pushl(&child.args, "multi-pack-index", "expire", NULL);
if (opts->quiet)
@@ -1531,7 +1531,8 @@ static int multi_pack_index_repack(struct maintenance_run_opts *opts)
{
struct child_process child = CHILD_PROCESS_INIT;
- child.git_cmd = child.close_object_store = 1;
+ child.git_cmd = 1;
+ child.odb_to_close = the_repository->objects;
strvec_pushl(&child.args, "multi-pack-index", "repack", NULL);
if (opts->quiet)
@@ -1969,7 +1970,7 @@ static void initialize_task_config(struct maintenance_run_opts *opts,
strategy = none_strategy;
type = MAINTENANCE_TYPE_SCHEDULED;
} else {
- strategy = gc_strategy;
+ strategy = geometric_strategy;
type = MAINTENANCE_TYPE_MANUAL;
}
@@ -3447,7 +3448,67 @@ static int maintenance_stop(int argc, const char **argv, const char *prefix,
return update_background_schedule(NULL, 0);
}
-static const char * const builtin_maintenance_usage[] = {
+static const char *const builtin_maintenance_is_needed_usage[] = {
+ "git maintenance is-needed [--task=<task>] [--schedule]",
+ NULL
+};
+
+static int maintenance_is_needed(int argc, const char **argv, const char *prefix,
+ struct repository *repo UNUSED)
+{
+ struct maintenance_run_opts opts = MAINTENANCE_RUN_OPTS_INIT;
+ struct string_list selected_tasks = STRING_LIST_INIT_DUP;
+ struct gc_config cfg = GC_CONFIG_INIT;
+ struct option options[] = {
+ OPT_BOOL(0, "auto", &opts.auto_flag,
+ N_("run tasks based on the state of the repository")),
+ OPT_CALLBACK_F(0, "task", &selected_tasks, N_("task"),
+ N_("check a specific task"),
+ PARSE_OPT_NONEG, task_option_parse),
+ OPT_END()
+ };
+ bool is_needed = false;
+
+ argc = parse_options(argc, argv, prefix, options,
+ builtin_maintenance_is_needed_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc)
+ usage_with_options(builtin_maintenance_is_needed_usage, options);
+
+ gc_config(&cfg);
+ initialize_task_config(&opts, &selected_tasks);
+
+ if (opts.auto_flag) {
+ for (size_t i = 0; i < opts.tasks_nr; i++) {
+ if (tasks[opts.tasks[i]].auto_condition &&
+ tasks[opts.tasks[i]].auto_condition(&cfg)) {
+ is_needed = true;
+ break;
+ }
+ }
+ } else {
+ /*
+ * When not using --auto we always require maintenance right now.
+ *
+ * TODO: this certainly is too eager, as some maintenance tasks may
+ * decide to not do anything because the data structures are already
+ * fully optimized. We may eventually want to extend the auto
+ * condition to also cover non-auto runs so that we can detect such
+ * cases.
+ */
+ is_needed = true;
+ }
+
+ string_list_clear(&selected_tasks, 0);
+ maintenance_run_opts_release(&opts);
+ gc_config_release(&cfg);
+
+ if (is_needed)
+ return 0;
+ return 1;
+}
+
+static const char *const builtin_maintenance_usage[] = {
N_("git maintenance <subcommand> [<options>]"),
NULL,
};
@@ -3464,6 +3525,7 @@ int cmd_maintenance(int argc,
OPT_SUBCOMMAND("stop", &fn, maintenance_stop),
OPT_SUBCOMMAND("register", &fn, maintenance_register),
OPT_SUBCOMMAND("unregister", &fn, maintenance_unregister),
+ OPT_SUBCOMMAND("is-needed", &fn, maintenance_is_needed),
OPT_END(),
};