aboutsummaryrefslogtreecommitdiff
path: root/submodule.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2022-03-25 16:38:25 -0700
committerJunio C Hamano <gitster@pobox.com>2022-03-25 16:38:25 -0700
commitdd9ff30dffdd03c579f4d867286dac3e46e05c8d (patch)
treeeeea4590b6fb9d9d470642a1608021eb70bc63d4 /submodule.c
parent6e1a8952e90e5d125177dbdee21425d1ba2d3584 (diff)
parent5fff35d880df2bb4cfce032c54a95abadce3f881 (diff)
downloadgit-dd9ff30dffdd03c579f4d867286dac3e46e05c8d.tar.xz
Merge branch 'gc/recursive-fetch-with-unused-submodules'
When "git fetch --recurse-submodules" grabbed submodule commits that would be needed to recursively check out newly fetched commits in the superproject, it only paid attention to submodules that are in the current checkout of the superproject. We now do so for all submodules that have been run "git submodule init" on. * gc/recursive-fetch-with-unused-submodules: submodule: fix latent check_has_commit() bug fetch: fetch unpopulated, changed submodules submodule: move logic into fetch_task_create() submodule: extract get_fetch_task() submodule: store new submodule commits oid_array in a struct submodule: inline submodule_commits() into caller submodule: make static functions read submodules from commits t5526: create superproject commits with test helper t5526: stop asserting on stderr literally t5526: introduce test helper to assert on fetches
Diffstat (limited to 'submodule.c')
-rw-r--r--submodule.c457
1 files changed, 309 insertions, 148 deletions
diff --git a/submodule.c b/submodule.c
index 5ace18a7d9..93c78a4dc3 100644
--- a/submodule.c
+++ b/submodule.c
@@ -167,26 +167,6 @@ void stage_updated_gitmodules(struct index_state *istate)
static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
-/* TODO: remove this function, use repo_submodule_init instead. */
-int add_submodule_odb(const char *path)
-{
- struct strbuf objects_directory = STRBUF_INIT;
- int ret = 0;
-
- ret = strbuf_git_path_submodule(&objects_directory, path, "objects/");
- if (ret)
- goto done;
- if (!is_directory(objects_directory.buf)) {
- ret = -1;
- goto done;
- }
- string_list_insert(&added_submodule_odb_paths,
- strbuf_detach(&objects_directory, NULL));
-done:
- strbuf_release(&objects_directory);
- return ret;
-}
-
void add_submodule_odb_by_path(const char *path)
{
string_list_insert(&added_submodule_odb_paths, xstrdup(path));
@@ -782,19 +762,6 @@ const struct submodule *submodule_from_ce(const struct cache_entry *ce)
return submodule_from_path(the_repository, null_oid(), ce->name);
}
-static struct oid_array *submodule_commits(struct string_list *submodules,
- const char *name)
-{
- struct string_list_item *item;
-
- item = string_list_insert(submodules, name);
- if (item->util)
- return (struct oid_array *) item->util;
-
- /* NEEDSWORK: should we have oid_array_init()? */
- item->util = xcalloc(1, sizeof(struct oid_array));
- return (struct oid_array *) item->util;
-}
struct collect_changed_submodules_cb_data {
struct repository *repo;
@@ -819,6 +786,52 @@ static const char *default_name_or_path(const char *path_or_name)
return path_or_name;
}
+/*
+ * Holds relevant information for a changed submodule. Used as the .util
+ * member of the changed submodule name string_list_item.
+ *
+ * (super_oid, path) allows the submodule config to be read from _some_
+ * .gitmodules file. We store this information the first time we find a
+ * superproject commit that points to the submodule, but this is
+ * arbitrary - we can choose any (super_oid, path) that matches the
+ * submodule's name.
+ *
+ * NEEDSWORK: Storing an arbitrary commit is undesirable because we can't
+ * guarantee that we're reading the commit that the user would expect. A better
+ * scheme would be to just fetch a submodule by its name. This requires two
+ * steps:
+ * - Create a function that behaves like repo_submodule_init(), but accepts a
+ * submodule name instead of treeish_name and path. This should be easy
+ * because repo_submodule_init() internally uses the submodule's name.
+ *
+ * - Replace most instances of 'struct submodule' (which is the .gitmodules
+ * config) with just the submodule name. This is OK because we expect
+ * submodule settings to be stored in .git/config (via "git submodule init"),
+ * not .gitmodules. This also lets us delete get_non_gitmodules_submodule(),
+ * which constructs a bogus 'struct submodule' for the sake of giving a
+ * placeholder name to a gitlink.
+ */
+struct changed_submodule_data {
+ /*
+ * The first superproject commit in the rev walk that points to
+ * the submodule.
+ */
+ const struct object_id *super_oid;
+ /*
+ * Path to the submodule in the superproject commit referenced
+ * by 'super_oid'.
+ */
+ char *path;
+ /* The submodule commits that have changed in the rev walk. */
+ struct oid_array new_commits;
+};
+
+static void changed_submodule_data_clear(struct changed_submodule_data *cs_data)
+{
+ oid_array_clear(&cs_data->new_commits);
+ free(cs_data->path);
+}
+
static void collect_changed_submodules_cb(struct diff_queue_struct *q,
struct diff_options *options,
void *data)
@@ -830,9 +843,10 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q,
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- struct oid_array *commits;
const struct submodule *submodule;
const char *name;
+ struct string_list_item *item;
+ struct changed_submodule_data *cs_data;
if (!S_ISGITLINK(p->two->mode))
continue;
@@ -859,8 +873,16 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q,
if (!name)
continue;
- commits = submodule_commits(changed, name);
- oid_array_append(commits, &p->two->oid);
+ item = string_list_insert(changed, name);
+ if (item->util)
+ cs_data = item->util;
+ else {
+ item->util = xcalloc(1, sizeof(struct changed_submodule_data));
+ cs_data = item->util;
+ cs_data->super_oid = commit_oid;
+ cs_data->path = xstrdup(p->two->path);
+ }
+ oid_array_append(&cs_data->new_commits, &p->two->oid);
}
}
@@ -907,11 +929,12 @@ static void collect_changed_submodules(struct repository *r,
reset_revision_walk();
}
-static void free_submodules_oids(struct string_list *submodules)
+static void free_submodules_data(struct string_list *submodules)
{
struct string_list_item *item;
for_each_string_list_item(item, submodules)
- oid_array_clear((struct oid_array *) item->util);
+ changed_submodule_data_clear(item->util);
+
string_list_clear(submodules, 1);
}
@@ -932,6 +955,7 @@ struct has_commit_data {
struct repository *repo;
int result;
const char *path;
+ const struct object_id *super_oid;
};
static int check_has_commit(const struct object_id *oid, void *data)
@@ -940,9 +964,10 @@ static int check_has_commit(const struct object_id *oid, void *data)
struct repository subrepo;
enum object_type type;
- if (repo_submodule_init(&subrepo, cb->repo, cb->path, null_oid())) {
+ if (repo_submodule_init(&subrepo, cb->repo, cb->path, cb->super_oid)) {
cb->result = 0;
- goto cleanup;
+ /* subrepo failed to init, so don't clean it up. */
+ return 0;
}
type = oid_object_info(&subrepo, oid, NULL);
@@ -968,21 +993,15 @@ cleanup:
static int submodule_has_commits(struct repository *r,
const char *path,
+ const struct object_id *super_oid,
struct oid_array *commits)
{
- struct has_commit_data has_commit = { r, 1, path };
-
- /*
- * Perform a cheap, but incorrect check for the existence of 'commits'.
- * This is done by adding the submodule's object store to the in-core
- * object store, and then querying for each commit's existence. If we
- * do not have the commit object anywhere, there is no chance we have
- * it in the object store of the correct submodule and have it
- * reachable from a ref, so we can fail early without spawning rev-list
- * which is expensive.
- */
- if (add_submodule_odb(path))
- return 0;
+ struct has_commit_data has_commit = {
+ .repo = r,
+ .result = 1,
+ .path = path,
+ .super_oid = super_oid
+ };
oid_array_for_each_unique(commits, check_has_commit, &has_commit);
@@ -1017,7 +1036,7 @@ static int submodule_needs_pushing(struct repository *r,
const char *path,
struct oid_array *commits)
{
- if (!submodule_has_commits(r, path, commits))
+ if (!submodule_has_commits(r, path, null_oid(), commits))
/*
* NOTE: We do consider it safe to return "no" here. The
* correct answer would be "We do not know" instead of
@@ -1077,7 +1096,7 @@ int find_unpushed_submodules(struct repository *r,
collect_changed_submodules(r, &submodules, &argv);
for_each_string_list_item(name, &submodules) {
- struct oid_array *commits = name->util;
+ struct changed_submodule_data *cs_data = name->util;
const struct submodule *submodule;
const char *path = NULL;
@@ -1090,11 +1109,11 @@ int find_unpushed_submodules(struct repository *r,
if (!path)
continue;
- if (submodule_needs_pushing(r, path, commits))
+ if (submodule_needs_pushing(r, path, &cs_data->new_commits))
string_list_insert(needs_pushing, path);
}
- free_submodules_oids(&submodules);
+ free_submodules_data(&submodules);
strvec_clear(&argv);
return needs_pushing->nr;
@@ -1240,14 +1259,36 @@ void check_for_new_submodule_commits(struct object_id *oid)
oid_array_append(&ref_tips_after_fetch, oid);
}
+/*
+ * Returns 1 if there is at least one submodule gitdir in
+ * $GIT_DIR/modules and 0 otherwise. This follows
+ * submodule_name_to_gitdir(), which looks for submodules in
+ * $GIT_DIR/modules, not $GIT_COMMON_DIR.
+ *
+ * A submodule can be moved to $GIT_DIR/modules manually by running "git
+ * submodule absorbgitdirs", or it may be initialized there by "git
+ * submodule update".
+ */
+static int repo_has_absorbed_submodules(struct repository *r)
+{
+ int ret;
+ struct strbuf buf = STRBUF_INIT;
+
+ strbuf_repo_git_path(&buf, r, "modules/");
+ ret = file_exists(buf.buf) && !is_empty_dir(buf.buf);
+ strbuf_release(&buf);
+ return ret;
+}
+
static void calculate_changed_submodule_paths(struct repository *r,
struct string_list *changed_submodule_names)
{
struct strvec argv = STRVEC_INIT;
struct string_list_item *name;
- /* No need to check if there are no submodules configured */
- if (!submodule_from_path(r, NULL, NULL))
+ /* No need to check if no submodules would be fetched */
+ if (!submodule_from_path(r, NULL, NULL) &&
+ !repo_has_absorbed_submodules(r))
return;
strvec_push(&argv, "--"); /* argv[0] program name */
@@ -1264,7 +1305,7 @@ static void calculate_changed_submodule_paths(struct repository *r,
collect_changed_submodules(r, changed_submodule_names, &argv);
for_each_string_list_item(name, changed_submodule_names) {
- struct oid_array *commits = name->util;
+ struct changed_submodule_data *cs_data = name->util;
const struct submodule *submodule;
const char *path = NULL;
@@ -1277,8 +1318,8 @@ static void calculate_changed_submodule_paths(struct repository *r,
if (!path)
continue;
- if (submodule_has_commits(r, path, commits)) {
- oid_array_clear(commits);
+ if (submodule_has_commits(r, path, null_oid(), &cs_data->new_commits)) {
+ changed_submodule_data_clear(cs_data);
*name->string = '\0';
}
}
@@ -1315,12 +1356,21 @@ int submodule_touches_in_range(struct repository *r,
strvec_clear(&args);
- free_submodules_oids(&subs);
+ free_submodules_data(&subs);
return ret;
}
struct submodule_parallel_fetch {
- int count;
+ /*
+ * The index of the last index entry processed by
+ * get_fetch_task_from_index().
+ */
+ int index_count;
+ /*
+ * The index of the last string_list entry processed by
+ * get_fetch_task_from_changed().
+ */
+ int changed_count;
struct strvec args;
struct repository *r;
const char *prefix;
@@ -1329,7 +1379,16 @@ struct submodule_parallel_fetch {
int quiet;
int result;
+ /*
+ * Names of submodules that have new commits. Generated by
+ * walking the newly fetched superproject commits.
+ */
struct string_list changed_submodule_names;
+ /*
+ * Names of submodules that have already been processed. Lets us
+ * avoid fetching the same submodule more than once.
+ */
+ struct string_list seen_submodule_names;
/* Pending fetches by OIDs */
struct fetch_task **oid_fetch_tasks;
@@ -1340,6 +1399,7 @@ struct submodule_parallel_fetch {
#define SPF_INIT { \
.args = STRVEC_INIT, \
.changed_submodule_names = STRING_LIST_INIT_DUP, \
+ .seen_submodule_names = STRING_LIST_INIT_DUP, \
.submodules_with_errors = STRBUF_INIT, \
}
@@ -1376,6 +1436,8 @@ struct fetch_task {
struct repository *repo;
const struct submodule *sub;
unsigned free_sub : 1; /* Do we need to free the submodule? */
+ const char *default_argv; /* The default fetch mode. */
+ struct strvec git_args; /* Args for the child git process. */
struct oid_array *commits; /* Ensure these commits are fetched */
};
@@ -1401,31 +1463,6 @@ static const struct submodule *get_non_gitmodules_submodule(const char *path)
return (const struct submodule *) ret;
}
-static struct fetch_task *fetch_task_create(struct repository *r,
- const char *path)
-{
- struct fetch_task *task = xmalloc(sizeof(*task));
- memset(task, 0, sizeof(*task));
-
- task->sub = submodule_from_path(r, null_oid(), path);
- if (!task->sub) {
- /*
- * No entry in .gitmodules? Technically not a submodule,
- * but historically we supported repositories that happen to be
- * in-place where a gitlink is. Keep supporting them.
- */
- task->sub = get_non_gitmodules_submodule(path);
- if (!task->sub) {
- free(task);
- return NULL;
- }
-
- task->free_sub = 1;
- }
-
- return task;
-}
-
static void fetch_task_release(struct fetch_task *p)
{
if (p->free_sub)
@@ -1436,14 +1473,17 @@ static void fetch_task_release(struct fetch_task *p)
if (p->repo)
repo_clear(p->repo);
FREE_AND_NULL(p->repo);
+
+ strvec_clear(&p->git_args);
}
static struct repository *get_submodule_repo_for(struct repository *r,
- const char *path)
+ const char *path,
+ const struct object_id *treeish_name)
{
struct repository *ret = xmalloc(sizeof(*ret));
- if (repo_submodule_init(ret, r, path, null_oid())) {
+ if (repo_submodule_init(ret, r, path, treeish_name)) {
free(ret);
return NULL;
}
@@ -1451,67 +1491,83 @@ static struct repository *get_submodule_repo_for(struct repository *r,
return ret;
}
-static int get_next_submodule(struct child_process *cp,
- struct strbuf *err, void *data, void **task_cb)
+static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf,
+ const char *path,
+ const struct object_id *treeish_name)
{
- struct submodule_parallel_fetch *spf = data;
+ struct fetch_task *task = xmalloc(sizeof(*task));
+ memset(task, 0, sizeof(*task));
+
+ task->sub = submodule_from_path(spf->r, treeish_name, path);
- for (; spf->count < spf->r->index->cache_nr; spf->count++) {
- const struct cache_entry *ce = spf->r->index->cache[spf->count];
- const char *default_argv;
+ if (!task->sub) {
+ /*
+ * No entry in .gitmodules? Technically not a submodule,
+ * but historically we supported repositories that happen to be
+ * in-place where a gitlink is. Keep supporting them.
+ */
+ task->sub = get_non_gitmodules_submodule(path);
+ if (!task->sub)
+ goto cleanup;
+
+ task->free_sub = 1;
+ }
+
+ if (string_list_lookup(&spf->seen_submodule_names, task->sub->name))
+ goto cleanup;
+
+ switch (get_fetch_recurse_config(task->sub, spf))
+ {
+ default:
+ case RECURSE_SUBMODULES_DEFAULT:
+ case RECURSE_SUBMODULES_ON_DEMAND:
+ if (!task->sub ||
+ !string_list_lookup(
+ &spf->changed_submodule_names,
+ task->sub->name))
+ goto cleanup;
+ task->default_argv = "on-demand";
+ break;
+ case RECURSE_SUBMODULES_ON:
+ task->default_argv = "yes";
+ break;
+ case RECURSE_SUBMODULES_OFF:
+ goto cleanup;
+ }
+
+ task->repo = get_submodule_repo_for(spf->r, path, treeish_name);
+
+ return task;
+
+ cleanup:
+ fetch_task_release(task);
+ free(task);
+ return NULL;
+}
+
+static struct fetch_task *
+get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
+{
+ for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) {
+ const struct cache_entry *ce =
+ spf->r->index->cache[spf->index_count];
struct fetch_task *task;
if (!S_ISGITLINK(ce->ce_mode))
continue;
- task = fetch_task_create(spf->r, ce->name);
+ task = fetch_task_create(spf, ce->name, null_oid());
if (!task)
continue;
- switch (get_fetch_recurse_config(task->sub, spf))
- {
- default:
- case RECURSE_SUBMODULES_DEFAULT:
- case RECURSE_SUBMODULES_ON_DEMAND:
- if (!task->sub ||
- !string_list_lookup(
- &spf->changed_submodule_names,
- task->sub->name))
- continue;
- default_argv = "on-demand";
- break;
- case RECURSE_SUBMODULES_ON:
- default_argv = "yes";
- break;
- case RECURSE_SUBMODULES_OFF:
- continue;
- }
-
- task->repo = get_submodule_repo_for(spf->r, task->sub->path);
if (task->repo) {
- struct strbuf submodule_prefix = STRBUF_INIT;
- child_process_init(cp);
- cp->dir = task->repo->gitdir;
- prepare_submodule_repo_env_in_gitdir(&cp->env_array);
- cp->git_cmd = 1;
if (!spf->quiet)
strbuf_addf(err, _("Fetching submodule %s%s\n"),
spf->prefix, ce->name);
- strvec_init(&cp->args);
- strvec_pushv(&cp->args, spf->args.v);
- strvec_push(&cp->args, default_argv);
- strvec_push(&cp->args, "--submodule-prefix");
- strbuf_addf(&submodule_prefix, "%s%s/",
- spf->prefix,
- task->sub->path);
- strvec_push(&cp->args, submodule_prefix.buf);
-
- spf->count++;
- *task_cb = task;
-
- strbuf_release(&submodule_prefix);
- return 1;
+ spf->index_count++;
+ return task;
} else {
struct strbuf empty_submodule_path = STRBUF_INIT;
@@ -1535,6 +1591,111 @@ static int get_next_submodule(struct child_process *cp,
strbuf_release(&empty_submodule_path);
}
}
+ return NULL;
+}
+
+static struct fetch_task *
+get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
+{
+ for (; spf->changed_count < spf->changed_submodule_names.nr;
+ spf->changed_count++) {
+ struct string_list_item item =
+ spf->changed_submodule_names.items[spf->changed_count];
+ struct changed_submodule_data *cs_data = item.util;
+ struct fetch_task *task;
+
+ if (!is_tree_submodule_active(spf->r, cs_data->super_oid,cs_data->path))
+ continue;
+
+ task = fetch_task_create(spf, cs_data->path,
+ cs_data->super_oid);
+ if (!task)
+ continue;
+
+ if (!task->repo) {
+ strbuf_addf(err, _("Could not access submodule '%s' at commit %s\n"),
+ cs_data->path,
+ find_unique_abbrev(cs_data->super_oid, DEFAULT_ABBREV));
+
+ fetch_task_release(task);
+ free(task);
+ continue;
+ }
+
+ if (!spf->quiet)
+ strbuf_addf(err,
+ _("Fetching submodule %s%s at commit %s\n"),
+ spf->prefix, task->sub->path,
+ find_unique_abbrev(cs_data->super_oid,
+ DEFAULT_ABBREV));
+
+ spf->changed_count++;
+ /*
+ * NEEDSWORK: Submodules set/unset a value for
+ * core.worktree when they are populated/unpopulated by
+ * "git checkout" (and similar commands, see
+ * submodule_move_head() and
+ * connect_work_tree_and_git_dir()), but if the
+ * submodule is unpopulated in another way (e.g. "git
+ * rm", "rm -r"), core.worktree will still be set even
+ * though the directory doesn't exist, and the child
+ * process will crash while trying to chdir into the
+ * nonexistent directory.
+ *
+ * In this case, we know that the submodule has no
+ * working tree, so we can work around this by
+ * setting "--work-tree=." (--bare does not work because
+ * worktree settings take precedence over bare-ness).
+ * However, this is not necessarily true in other cases,
+ * so a generalized solution is still necessary.
+ *
+ * Possible solutions:
+ * - teach "git [add|rm]" to unset core.worktree and
+ * discourage users from removing submodules without
+ * using a Git command.
+ * - teach submodule child processes to ignore stale
+ * core.worktree values.
+ */
+ strvec_push(&task->git_args, "--work-tree=.");
+ return task;
+ }
+ return NULL;
+}
+
+static int get_next_submodule(struct child_process *cp, struct strbuf *err,
+ void *data, void **task_cb)
+{
+ struct submodule_parallel_fetch *spf = data;
+ struct fetch_task *task =
+ get_fetch_task_from_index(spf, err);
+ if (!task)
+ task = get_fetch_task_from_changed(spf, err);
+
+ if (task) {
+ struct strbuf submodule_prefix = STRBUF_INIT;
+
+ child_process_init(cp);
+ cp->dir = task->repo->gitdir;
+ prepare_submodule_repo_env_in_gitdir(&cp->env_array);
+ cp->git_cmd = 1;
+ strvec_init(&cp->args);
+ if (task->git_args.nr)
+ strvec_pushv(&cp->args, task->git_args.v);
+ strvec_pushv(&cp->args, spf->args.v);
+ strvec_push(&cp->args, task->default_argv);
+ strvec_push(&cp->args, "--submodule-prefix");
+
+ strbuf_addf(&submodule_prefix, "%s%s/",
+ spf->prefix,
+ task->sub->path);
+ strvec_push(&cp->args, submodule_prefix.buf);
+ *task_cb = task;
+
+ strbuf_release(&submodule_prefix);
+ string_list_insert(&spf->seen_submodule_names, task->sub->name);
+ return 1;
+ }
if (spf->oid_fetch_tasks_nr) {
struct fetch_task *task =
@@ -1597,7 +1758,7 @@ static int fetch_finish(int retvalue, struct strbuf *err,
struct fetch_task *task = task_cb;
struct string_list_item *it;
- struct oid_array *commits;
+ struct changed_submodule_data *cs_data;
if (!task || !task->sub)
BUG("callback cookie bogus");
@@ -1625,14 +1786,14 @@ static int fetch_finish(int retvalue, struct strbuf *err,
/* Could be an unchanged submodule, not contained in the list */
goto out;
- commits = it->util;
- oid_array_filter(commits,
+ cs_data = it->util;
+ oid_array_filter(&cs_data->new_commits,
commit_missing_in_sub,
task->repo);
/* Are there commits we want, but do not exist? */
- if (commits->nr) {
- task->commits = commits;
+ if (cs_data->new_commits.nr) {
+ task->commits = &cs_data->new_commits;
ALLOC_GROW(spf->oid_fetch_tasks,
spf->oid_fetch_tasks_nr + 1,
spf->oid_fetch_tasks_alloc);
@@ -1647,11 +1808,11 @@ out:
return 0;
}
-int fetch_populated_submodules(struct repository *r,
- const struct strvec *options,
- const char *prefix, int command_line_option,
- int default_option,
- int quiet, int max_parallel_jobs)
+int fetch_submodules(struct repository *r,
+ const struct strvec *options,
+ const char *prefix, int command_line_option,
+ int default_option,
+ int quiet, int max_parallel_jobs)
{
int i;
struct submodule_parallel_fetch spf = SPF_INIT;
@@ -1690,7 +1851,7 @@ int fetch_populated_submodules(struct repository *r,
strvec_clear(&spf.args);
out:
- free_submodules_oids(&spf.changed_submodule_names);
+ free_submodules_data(&spf.changed_submodule_names);
return spf.result;
}