From 6e773527b6b03976cefbb0f9571bd40dd5995e6c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 30 Mar 2021 13:10:55 +0000 Subject: sparse-index: convert from full to sparse If we have a full index, then we can convert it to a sparse index by replacing directories outside of the sparse cone with sparse directory entries. The convert_to_sparse() method does this, when the situation is appropriate. For now, we avoid converting the index to a sparse index if: 1. the index is split. 2. the index is already sparse. 3. sparse-checkout is disabled. 4. sparse-checkout does not use cone mode. Finally, we currently limit the conversion to when the GIT_TEST_SPARSE_INDEX environment variable is enabled. A mode using Git config will be added in a later change. The trickiest thing about this conversion is that we might not be able to mark a directory as a sparse directory just because it is outside the sparse cone. There might be unmerged files within that directory, so we need to look for those. Also, if there is some strange reason why a file is not marked with CE_SKIP_WORKTREE, then we should give up on converting that directory. There is still hope that some of its subdirectories might be able to convert to sparse, so we keep looking deeper. The conversion process is assisted by the cache-tree extension. This is calculated from the full index if it does not already exist. We then abandon the cache-tree as it no longer applies to the newly-sparse index. Thus, this cache-tree will be recalculated in every sparse-full-sparse round-trip until we integrate the cache-tree extension with the sparse index. Some Git commands use the index after writing it. For example, 'git add' will update the index, then write it to disk, then read its entries to report information. To keep the in-memory index in a full state after writing, we re-expand it to a full one after the write. This is wasteful for commands that only write the index and do not read from it again, but that is only the case until we make those commands "sparse aware." We can compare the behavior of the sparse-index in t1092-sparse-checkout-compability.sh by using GIT_TEST_SPARSE_INDEX=1 when operating on the 'sparse-index' repo. We can also compare the two sparse repos directly, such as comparing their indexes (when expanded to full in the case of the 'sparse-index' repo). We also verify that the index is actually populated with sparse directory entries. The 'checkout and reset (mixed)' test is marked for failure when comparing a sparse repo to a full repo, but we can compare the two sparse-checkout cases directly to ensure that we are not changing the behavior when using a sparse index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'cache-tree.c') diff --git a/cache-tree.c b/cache-tree.c index 2fb483d3c0..5f07a39e50 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -6,6 +6,7 @@ #include "object-store.h" #include "replace-object.h" #include "promisor-remote.h" +#include "sparse-index.h" #ifndef DEBUG_CACHE_TREE #define DEBUG_CACHE_TREE 0 @@ -442,6 +443,8 @@ int cache_tree_update(struct index_state *istate, int flags) if (i) return i; + ensure_full_index(istate); + if (!istate->cache_tree) istate->cache_tree = cache_tree(); -- cgit v1.3 From 2de37c536d54a28a032491ad4ef97632ef6ab836 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 30 Mar 2021 13:11:02 +0000 Subject: cache-tree: integrate with sparse directory entries The cache-tree extension was previously disabled with sparse indexes. However, the cache-tree is an important performance feature for commands like 'git status' and 'git add'. Integrate it with sparse directory entries. When writing a sparse index, completely clear and recalculate the cache tree. By starting from scratch, the only integration necessary is to check if we hit a sparse directory entry and create a leaf of the cache-tree that has an entry_count of one and no subtrees. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache-tree.c | 18 ++++++++++++++++++ sparse-index.c | 10 +++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'cache-tree.c') diff --git a/cache-tree.c b/cache-tree.c index 5f07a39e50..950a9615db 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -256,6 +256,24 @@ static int update_one(struct cache_tree *it, *skip_count = 0; + /* + * If the first entry of this region is a sparse directory + * entry corresponding exactly to 'base', then this cache_tree + * struct is a "leaf" in the data structure, pointing to the + * tree OID specified in the entry. + */ + if (entries > 0) { + const struct cache_entry *ce = cache[0]; + + if (S_ISSPARSEDIR(ce->ce_mode) && + ce->ce_namelen == baselen && + !strncmp(ce->name, base, baselen)) { + it->entry_count = 1; + oidcpy(&it->oid, &ce->oid); + return 1; + } + } + if (0 <= it->entry_count && has_object_file(&it->oid)) return it->entry_count; diff --git a/sparse-index.c b/sparse-index.c index 4c73772c6d..95ea17174d 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -172,7 +172,11 @@ int convert_to_sparse(struct index_state *istate) istate->cache_nr = convert_to_sparse_rec(istate, 0, 0, istate->cache_nr, "", 0, istate->cache_tree); - istate->drop_cache_tree = 1; + + /* Clear and recompute the cache-tree */ + cache_tree_free(&istate->cache_tree); + cache_tree_update(istate, 0); + istate->sparse_index = 1; trace2_region_leave("index", "convert_to_sparse", istate->repo); return 0; @@ -273,5 +277,9 @@ void ensure_full_index(struct index_state *istate) strbuf_release(&base); free(full); + /* Clear and recompute the cache-tree */ + cache_tree_free(&istate->cache_tree); + cache_tree_update(istate, 0); + trace2_region_leave("index", "ensure_full_index", istate->repo); } -- cgit v1.3 From 9ad2d5ea71d1470c2fd68b76c36b2ced3612cde3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 30 Mar 2021 13:11:03 +0000 Subject: sparse-index: loose integration with cache_tree_verify() The cache_tree_verify() method is run when GIT_TEST_CHECK_CACHE_TREE is enabled, which it is by default in the test suite. The logic must be adjusted for the presence of these directory entries. For now, leave the test as a simple check for whether the directory entry is sparse. Do not go any further until needed. This allows us to re-enable GIT_TEST_CHECK_CACHE_TREE in t1092-sparse-checkout-compatibility.sh. Further, p2000-sparse-operations.sh uses the test suite and hence this is enabled for all tests. We need to integrate with it before we run our performance tests with a sparse-index. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- cache-tree.c | 19 +++++++++++++++++++ t/t1092-sparse-checkout-compatibility.sh | 3 --- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'cache-tree.c') diff --git a/cache-tree.c b/cache-tree.c index 950a9615db..11bf1fcae6 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -808,6 +808,19 @@ int cache_tree_matches_traversal(struct cache_tree *root, return 0; } +static void verify_one_sparse(struct repository *r, + struct index_state *istate, + struct cache_tree *it, + struct strbuf *path, + int pos) +{ + struct cache_entry *ce = istate->cache[pos]; + + if (!S_ISSPARSEDIR(ce->ce_mode)) + BUG("directory '%s' is present in index, but not sparse", + path->buf); +} + static void verify_one(struct repository *r, struct index_state *istate, struct cache_tree *it, @@ -830,6 +843,12 @@ static void verify_one(struct repository *r, if (path->len) { pos = index_name_pos(istate, path->buf, path->len); + + if (pos >= 0) { + verify_one_sparse(r, istate, it, path, pos); + return; + } + pos = -pos - 1; } else { pos = 0; diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index 472c5337de..12e6c45302 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -2,9 +2,6 @@ test_description='compare full workdir to sparse workdir' -# The verify_cache_tree() check is not sparse-aware (yet). -# So, disable the check until that integration is complete. -GIT_TEST_CHECK_CACHE_TREE=0 GIT_TEST_SPLIT_INDEX=0 GIT_TEST_SPARSE_INDEX= -- cgit v1.3