From c46c406ae1ee30f64a13083edfa5683d2685fd61 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sat, 18 Aug 2018 16:41:22 +0200 Subject: trace.h: support nested performance tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Performance measurements are listed right now as a flat list, which is fine when we measure big blocks. But when we start adding more and more measurements, some of them could be just part of a bigger measurement and a flat list gives a wrong impression that they are executed at the same level instead of nested. Add trace_performance_enter() and trace_performance_leave() to allow indent these nested measurements. For now it does not help much because the only nested thing is (lazy) name hash initialization (e.g. called in diff-index from "git status"). This will help more because I'm going to add some more tracing that's actually nested. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- read-cache.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'read-cache.c') diff --git a/read-cache.c b/read-cache.c index c5fabc844a..1c9c88c130 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1476,8 +1476,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char *typechange_fmt; const char *added_fmt; const char *unmerged_fmt; - uint64_t start = getnanotime(); + trace_performance_enter(); modified_fmt = (in_porcelain ? "M\t%s\n" : "%s: needs update\n"); deleted_fmt = (in_porcelain ? "D\t%s\n" : "%s: needs update\n"); typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n"); @@ -1547,7 +1547,7 @@ int refresh_index(struct index_state *istate, unsigned int flags, replace_index_entry(istate, i, new_entry); } - trace_performance_since(start, "refresh index"); + trace_performance_leave("refresh index"); return has_errors; } @@ -2002,7 +2002,6 @@ static void freshen_shared_index(const char *shared_index, int warn) int read_index_from(struct index_state *istate, const char *path, const char *gitdir) { - uint64_t start = getnanotime(); struct split_index *split_index; int ret; char *base_oid_hex; @@ -2012,8 +2011,9 @@ int read_index_from(struct index_state *istate, const char *path, if (istate->initialized) return istate->cache_nr; + trace_performance_enter(); ret = do_read_index(istate, path, 0); - trace_performance_since(start, "read cache %s", path); + trace_performance_leave("read cache %s", path); split_index = istate->split_index; if (!split_index || is_null_oid(&split_index->base_oid)) { @@ -2021,6 +2021,7 @@ int read_index_from(struct index_state *istate, const char *path, return ret; } + trace_performance_enter(); if (split_index->base) discard_index(split_index->base); else @@ -2037,8 +2038,8 @@ int read_index_from(struct index_state *istate, const char *path, freshen_shared_index(base_path, 0); merge_base_index(istate); post_read_index_from(istate); - trace_performance_since(start, "read cache %s", base_path); free(base_path); + trace_performance_leave("read cache %s", base_path); return ret; } -- cgit v1.3-5-g9baa From 836ef2b69f3a8668c35a537715cf3bbc08fdcf39 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sat, 18 Aug 2018 16:41:26 +0200 Subject: unpack-trees: reuse (still valid) cache-tree from src_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do n-way merge by walking the source index and n trees at the same time and add merge results to a new temporary index called o->result. The merge result for any given path could be either - keep_entry(): same old index entry in o->src_index is reused - merged_entry(): either a new entry is added, or an existing one updated - deleted_entry(): one entry from o->src_index is removed For some reason [1] we keep making sure that the source index's cache-tree is still valid if used by o->result: for all those merged/deleted entries, we invalidate the same path in o->src_index, so only cache-trees covering the "keep_entry" parts remain good. Because of this, the cache-tree from o->src_index can be perfectly reused in o->result. And in fact we already rely on this logic to reuse untracked cache in edf3b90553 (unpack-trees: preserve index extensions - 2017-05-08). Move the cache-tree to o->result before doing cache_tree_update() to reduce hashing cost. Since cache_tree_update() has risen up as one of the most expensive parts in unpack_trees() after the last few patches. This does help reduce unpack_trees() time significantly (on webkit.git): before after -------------------------------------------------------------------- 0.080394752 0.051258167 s: read cache .git/index 0.216010838 0.212106298 s: preload index 0.008534301 0.280521764 s: refresh index 0.251992198 0.218160442 s: traverse_trees 0.377031383 0.374948191 s: check_updates 0.372768105 0.037040114 s: cache_tree_update 1.045887251 0.672031609 s: unpack_trees 0.314983512 0.317456290 s: write index, changed mask = 2e 0.062572653 0.038382654 s: traverse_trees 0.000022544 0.000042731 s: check_updates 0.073795585 0.050930053 s: unpack_trees 0.073807557 0.051099735 s: diff-index 1.938191592 1.614241153 s: git command: git checkout - [1] I'm pretty sure the reason is an oversight in 34110cd4e3 (Make 'unpack_trees()' have a separate source and destination index - 2008-03-06). That patch aims to _not_ update the source index at all. The invalidation should have been done on o->result in that patch. But then there was no cache-tree on o->result even then so it's pointless to do so. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- read-cache.c | 2 ++ unpack-trees.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'read-cache.c') diff --git a/read-cache.c b/read-cache.c index 1c9c88c130..5ce40f39b3 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2940,6 +2940,8 @@ void move_index_extensions(struct index_state *dst, struct index_state *src) { dst->untracked = src->untracked; src->untracked = NULL; + dst->cache_tree = src->cache_tree; + src->cache_tree = NULL; } struct cache_entry *dup_cache_entry(const struct cache_entry *ce, diff --git a/unpack-trees.c b/unpack-trees.c index dbef6e1b8a..aa80b65ee1 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1576,6 +1576,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options ret = check_updates(o) ? (-2) : 0; if (o->dst_index) { + move_index_extensions(&o->result, o->src_index); if (!ret) { if (!o->result.cache_tree) o->result.cache_tree = cache_tree(); @@ -1584,7 +1585,6 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } - move_index_extensions(&o->result, o->src_index); discard_index(o->dst_index); *o->dst_index = o->result; } else { -- cgit v1.3-5-g9baa From 4592e6080ff0f9eb0218162be0e40b2d6abc979a Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sat, 18 Aug 2018 16:41:28 +0200 Subject: cache-tree: verify valid cache-tree in the test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes sure that cache-tree is consistent with the index. The main purpose is to catch potential problems by saving the index in unpack_trees() but the line in write_index() would also help spot missing invalidation in other code. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache-tree.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cache-tree.h | 1 + read-cache.c | 3 +++ t/test-lib.sh | 6 +++++ unpack-trees.c | 2 ++ 5 files changed, 90 insertions(+) (limited to 'read-cache.c') diff --git a/cache-tree.c b/cache-tree.c index caafbff2ff..c3c206427c 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -4,6 +4,7 @@ #include "tree-walk.h" #include "cache-tree.h" #include "object-store.h" +#include "replace-object.h" #ifndef DEBUG #define DEBUG 0 @@ -732,3 +733,80 @@ int update_main_cache_tree(int flags) the_index.cache_tree = cache_tree(); return cache_tree_update(&the_index, flags); } + +static void verify_one(struct index_state *istate, + struct cache_tree *it, + struct strbuf *path) +{ + int i, pos, len = path->len; + struct strbuf tree_buf = STRBUF_INIT; + struct object_id new_oid; + + for (i = 0; i < it->subtree_nr; i++) { + strbuf_addf(path, "%s/", it->down[i]->name); + verify_one(istate, it->down[i]->cache_tree, path); + strbuf_setlen(path, len); + } + + if (it->entry_count < 0 || + /* no verification on tests (t7003) that replace trees */ + lookup_replace_object(the_repository, &it->oid) != &it->oid) + return; + + if (path->len) { + pos = index_name_pos(istate, path->buf, path->len); + pos = -pos - 1; + } else { + pos = 0; + } + + i = 0; + while (i < it->entry_count) { + struct cache_entry *ce = istate->cache[pos + i]; + const char *slash; + struct cache_tree_sub *sub = NULL; + const struct object_id *oid; + const char *name; + unsigned mode; + int entlen; + + if (ce->ce_flags & (CE_STAGEMASK | CE_INTENT_TO_ADD | CE_REMOVE)) + BUG("%s with flags 0x%x should not be in cache-tree", + ce->name, ce->ce_flags); + name = ce->name + path->len; + slash = strchr(name, '/'); + if (slash) { + entlen = slash - name; + sub = find_subtree(it, ce->name + path->len, entlen, 0); + if (!sub || sub->cache_tree->entry_count < 0) + BUG("bad subtree '%.*s'", entlen, name); + oid = &sub->cache_tree->oid; + mode = S_IFDIR; + i += sub->cache_tree->entry_count; + } else { + oid = &ce->oid; + mode = ce->ce_mode; + entlen = ce_namelen(ce) - path->len; + i++; + } + strbuf_addf(&tree_buf, "%o %.*s%c", mode, entlen, name, '\0'); + strbuf_add(&tree_buf, oid->hash, the_hash_algo->rawsz); + } + hash_object_file(tree_buf.buf, tree_buf.len, tree_type, &new_oid); + if (oidcmp(&new_oid, &it->oid)) + BUG("cache-tree for path %.*s does not match. " + "Expected %s got %s", len, path->buf, + oid_to_hex(&new_oid), oid_to_hex(&it->oid)); + strbuf_setlen(path, len); + strbuf_release(&tree_buf); +} + +void cache_tree_verify(struct index_state *istate) +{ + struct strbuf path = STRBUF_INIT; + + if (!istate->cache_tree) + return; + verify_one(istate, istate->cache_tree, &path); + strbuf_release(&path); +} diff --git a/cache-tree.h b/cache-tree.h index 9799e894f7..c1fde531f9 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -32,6 +32,7 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); int cache_tree_update(struct index_state *, int); +void cache_tree_verify(struct index_state *); int update_main_cache_tree(int); diff --git a/read-cache.c b/read-cache.c index 5ce40f39b3..41f313bc9e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2744,6 +2744,9 @@ int write_locked_index(struct index_state *istate, struct lock_file *lock, int new_shared_index, ret; struct split_index *si = istate->split_index; + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) + cache_tree_verify(istate); + if ((flags & SKIP_IF_UNCHANGED) && !istate->cache_changed) { if (flags & COMMIT_LOCK) rollback_lock_file(lock); diff --git a/t/test-lib.sh b/t/test-lib.sh index 78f7097746..5b50f6e2e6 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1083,6 +1083,12 @@ else test_set_prereq C_LOCALE_OUTPUT fi +if test -z "$GIT_TEST_CHECK_CACHE_TREE" +then + GIT_TEST_CHECK_CACHE_TREE=true + export GIT_TEST_CHECK_CACHE_TREE +fi + test_lazy_prereq PIPE ' # test whether the filesystem supports FIFOs test_have_prereq !MINGW,!CYGWIN && diff --git a/unpack-trees.c b/unpack-trees.c index bc43922922..3394540842 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1578,6 +1578,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options if (o->dst_index) { move_index_extensions(&o->result, o->src_index); if (!ret) { + if (git_env_bool("GIT_TEST_CHECK_CACHE_TREE", 0)) + cache_tree_verify(&o->result); if (!o->result.cache_tree) o->result.cache_tree = cache_tree(); if (!cache_tree_fully_valid(o->result.cache_tree)) -- cgit v1.3-5-g9baa