From 08fd81c9b6495a395a527985d18aa51c4ae66cdc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 2 Apr 2018 16:34:19 -0400 Subject: commit-graph: implement write_commit_graph() Teach Git to write a commit graph file by checking all packed objects to see if they are commits, then store the file in the given object directory. Helped-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- commit-graph.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 commit-graph.h (limited to 'commit-graph.h') diff --git a/commit-graph.h b/commit-graph.h new file mode 100644 index 0000000000..16fea993ab --- /dev/null +++ b/commit-graph.h @@ -0,0 +1,6 @@ +#ifndef COMMIT_GRAPH_H +#define COMMIT_GRAPH_H + +void write_commit_graph(const char *obj_dir); + +#endif -- cgit v1.3 From 2a2e32bdc5a80221981939e77643cec3462b4793 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 10 Apr 2018 08:56:02 -0400 Subject: commit-graph: implement git commit-graph read Teach git-commit-graph to read commit graph files and summarize their contents. Use the read subcommand to verify the contents of a commit graph file in the tests. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 12 ++++ builtin/commit-graph.c | 56 +++++++++++++++ commit-graph.c | 137 ++++++++++++++++++++++++++++++++++++- commit-graph.h | 23 +++++++ t/t5318-commit-graph.sh | 32 +++++++-- 5 files changed, 254 insertions(+), 6 deletions(-) (limited to 'commit-graph.h') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 47996e8f89..8aad8303f5 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -9,6 +9,7 @@ git-commit-graph - Write and verify Git commit graph files SYNOPSIS -------- [verse] +'git commit-graph read' [--object-dir ] 'git commit-graph write' [--object-dir ] @@ -35,6 +36,11 @@ COMMANDS Write a commit graph file based on the commits found in packfiles. Includes all commits from the existing commit graph file. +'read':: + +Read a graph file given by the commit-graph file and output basic +details about the graph file. Used for debugging purposes. + EXAMPLES -------- @@ -45,6 +51,12 @@ EXAMPLES $ git commit-graph write ------------------------------------------------ +* Read basic information from the commit-graph file. ++ +------------------------------------------------ +$ git commit-graph read +------------------------------------------------ + GIT --- diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 26b6360289..efd39331d7 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -7,10 +7,16 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), + N_("git commit-graph read [--object-dir ]"), N_("git commit-graph write [--object-dir ]"), NULL }; +static const char * const builtin_commit_graph_read_usage[] = { + N_("git commit-graph read [--object-dir ]"), + NULL +}; + static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ]"), NULL @@ -20,6 +26,54 @@ static struct opts_commit_graph { const char *obj_dir; } opts; +static int graph_read(int argc, const char **argv) +{ + struct commit_graph *graph = NULL; + char *graph_name; + + static struct option builtin_commit_graph_read_options[] = { + OPT_STRING(0, "object-dir", &opts.obj_dir, + N_("dir"), + N_("The object directory to store the graph")), + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, + builtin_commit_graph_read_options, + builtin_commit_graph_read_usage, 0); + + if (!opts.obj_dir) + opts.obj_dir = get_object_directory(); + + graph_name = get_commit_graph_filename(opts.obj_dir); + graph = load_commit_graph_one(graph_name); + + if (!graph) + die("graph file %s does not exist", graph_name); + FREE_AND_NULL(graph_name); + + printf("header: %08x %d %d %d %d\n", + ntohl(*(uint32_t*)graph->data), + *(unsigned char*)(graph->data + 4), + *(unsigned char*)(graph->data + 5), + *(unsigned char*)(graph->data + 6), + *(unsigned char*)(graph->data + 7)); + printf("num_commits: %u\n", graph->num_commits); + printf("chunks:"); + + if (graph->chunk_oid_fanout) + printf(" oid_fanout"); + if (graph->chunk_oid_lookup) + printf(" oid_lookup"); + if (graph->chunk_commit_data) + printf(" commit_metadata"); + if (graph->chunk_large_edges) + printf(" large_edges"); + printf("\n"); + + return 0; +} + static int graph_write(int argc, const char **argv) { static struct option builtin_commit_graph_write_options[] = { @@ -60,6 +114,8 @@ int cmd_commit_graph(int argc, const char **argv, const char *prefix) PARSE_OPT_STOP_AT_NON_OPTION); if (argc > 0) { + if (!strcmp(argv[0], "read")) + return graph_read(argc, argv); if (!strcmp(argv[0], "write")) return graph_write(argc, argv); } diff --git a/commit-graph.c b/commit-graph.c index f3f7c4f189..b1bd3a892d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -39,11 +39,146 @@ GRAPH_OID_LEN + 8) -static char *get_commit_graph_filename(const char *obj_dir) +char *get_commit_graph_filename(const char *obj_dir) { return xstrfmt("%s/info/commit-graph", obj_dir); } +static struct commit_graph *alloc_commit_graph(void) +{ + struct commit_graph *g = xcalloc(1, sizeof(*g)); + g->graph_fd = -1; + + return g; +} + +struct commit_graph *load_commit_graph_one(const char *graph_file) +{ + void *graph_map; + const unsigned char *data, *chunk_lookup; + size_t graph_size; + struct stat st; + uint32_t i; + struct commit_graph *graph; + int fd = git_open(graph_file); + uint64_t last_chunk_offset; + uint32_t last_chunk_id; + uint32_t graph_signature; + unsigned char graph_version, hash_version; + + if (fd < 0) + return NULL; + if (fstat(fd, &st)) { + close(fd); + return NULL; + } + graph_size = xsize_t(st.st_size); + + if (graph_size < GRAPH_MIN_SIZE) { + close(fd); + die("graph file %s is too small", graph_file); + } + graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); + data = (const unsigned char *)graph_map; + + graph_signature = get_be32(data); + if (graph_signature != GRAPH_SIGNATURE) { + error("graph signature %X does not match signature %X", + graph_signature, GRAPH_SIGNATURE); + goto cleanup_fail; + } + + graph_version = *(unsigned char*)(data + 4); + if (graph_version != GRAPH_VERSION) { + error("graph version %X does not match version %X", + graph_version, GRAPH_VERSION); + goto cleanup_fail; + } + + hash_version = *(unsigned char*)(data + 5); + if (hash_version != GRAPH_OID_VERSION) { + error("hash version %X does not match version %X", + hash_version, GRAPH_OID_VERSION); + goto cleanup_fail; + } + + graph = alloc_commit_graph(); + + graph->hash_len = GRAPH_OID_LEN; + graph->num_chunks = *(unsigned char*)(data + 6); + graph->graph_fd = fd; + graph->data = graph_map; + graph->data_len = graph_size; + + last_chunk_id = 0; + last_chunk_offset = 8; + chunk_lookup = data + 8; + for (i = 0; i < graph->num_chunks; i++) { + uint32_t chunk_id = get_be32(chunk_lookup + 0); + uint64_t chunk_offset = get_be64(chunk_lookup + 4); + int chunk_repeated = 0; + + chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH; + + if (chunk_offset > graph_size - GIT_MAX_RAWSZ) { + error("improper chunk offset %08x%08x", (uint32_t)(chunk_offset >> 32), + (uint32_t)chunk_offset); + goto cleanup_fail; + } + + switch (chunk_id) { + case GRAPH_CHUNKID_OIDFANOUT: + if (graph->chunk_oid_fanout) + chunk_repeated = 1; + else + graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset); + break; + + case GRAPH_CHUNKID_OIDLOOKUP: + if (graph->chunk_oid_lookup) + chunk_repeated = 1; + else + graph->chunk_oid_lookup = data + chunk_offset; + break; + + case GRAPH_CHUNKID_DATA: + if (graph->chunk_commit_data) + chunk_repeated = 1; + else + graph->chunk_commit_data = data + chunk_offset; + break; + + case GRAPH_CHUNKID_LARGEEDGES: + if (graph->chunk_large_edges) + chunk_repeated = 1; + else + graph->chunk_large_edges = data + chunk_offset; + break; + } + + if (chunk_repeated) { + error("chunk id %08x appears multiple times", chunk_id); + goto cleanup_fail; + } + + if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP) + { + graph->num_commits = (chunk_offset - last_chunk_offset) + / graph->hash_len; + } + + last_chunk_id = chunk_id; + last_chunk_offset = chunk_offset; + } + + return graph; + +cleanup_fail: + munmap(graph_map, graph_size); + close(fd); + exit(1); +} + static void write_graph_chunk_fanout(struct hashfile *f, struct commit **commits, int nr_commits) diff --git a/commit-graph.h b/commit-graph.h index 16fea993ab..2528478f06 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -1,6 +1,29 @@ #ifndef COMMIT_GRAPH_H #define COMMIT_GRAPH_H +#include "git-compat-util.h" + +char *get_commit_graph_filename(const char *obj_dir); + +struct commit_graph { + int graph_fd; + + const unsigned char *data; + size_t data_len; + + unsigned char hash_len; + unsigned char num_chunks; + uint32_t num_commits; + struct object_id oid; + + const uint32_t *chunk_oid_fanout; + const unsigned char *chunk_oid_lookup; + const unsigned char *chunk_commit_data; + const unsigned char *chunk_large_edges; +}; + +struct commit_graph *load_commit_graph_one(const char *graph_file); + void write_commit_graph(const char *obj_dir); #endif diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index d7b635bd68..2f44f91193 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -26,10 +26,28 @@ test_expect_success 'create commits and repack' ' git repack ' +graph_read_expect() { + OPTIONAL="" + NUM_CHUNKS=3 + if test ! -z $2 + then + OPTIONAL=" $2" + NUM_CHUNKS=$((3 + $(echo "$2" | wc -w))) + fi + cat >expect <<- EOF + header: 43475048 1 1 $NUM_CHUNKS 0 + num_commits: $1 + chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL + EOF + git commit-graph read >output && + test_cmp expect output +} + test_expect_success 'write graph' ' cd "$TRASH_DIRECTORY/full" && graph1=$(git commit-graph write) && - test_path_is_file $objdir/info/commit-graph + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "3" ' test_expect_success 'Add more commits' ' @@ -72,7 +90,8 @@ test_expect_success 'Add more commits' ' test_expect_success 'write graph with merges' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && - test_path_is_file $objdir/info/commit-graph + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "10" "large_edges" ' test_expect_success 'Add one more commit' ' @@ -99,13 +118,15 @@ test_expect_success 'Add one more commit' ' test_expect_success 'write graph with new commit' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && - test_path_is_file $objdir/info/commit-graph + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "11" "large_edges" ' test_expect_success 'write graph with nothing new' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && - test_path_is_file $objdir/info/commit-graph + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "11" "large_edges" ' test_expect_success 'setup bare repo' ' @@ -118,7 +139,8 @@ test_expect_success 'setup bare repo' ' test_expect_success 'write graph in bare repo' ' cd "$TRASH_DIRECTORY/bare" && git commit-graph write && - test_path_is_file $baredir/info/commit-graph + test_path_is_file $baredir/info/commit-graph && + graph_read_expect "11" "large_edges" ' test_done -- cgit v1.3 From 177722b344256b84f1c97b7363d3f19c04928039 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 10 Apr 2018 08:56:05 -0400 Subject: commit: integrate commit graph with commit parsing Teach Git to inspect a commit graph file to supply the contents of a struct commit when calling parse_commit_gently(). This implementation satisfies all post-conditions on the struct commit, including loading parents, the root tree, and the commit date. If core.commitGraph is false, then do not check graph files. In test script t5318-commit-graph.sh, add output-matching conditions on read-only graph operations. By loading commits from the graph instead of parsing commit buffers, we save a lot of time on long commit walks. Here are some performance results for a copy of the Linux repository where 'master' has 678,653 reachable commits and is behind 'origin/master' by 59,929 commits. | Command | Before | After | Rel % | |----------------------------------|--------|--------|-------| | log --oneline --topo-order -1000 | 8.31s | 0.94s | -88% | | branch -vv | 1.02s | 0.14s | -86% | | rev-list --all | 5.89s | 1.07s | -81% | | rev-list --all --objects | 66.15s | 58.45s | -11% | Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- alloc.c | 1 + commit-graph.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++- commit-graph.h | 12 +++++ commit.c | 3 ++ commit.h | 3 ++ t/t5318-commit-graph.sh | 47 +++++++++++++++- 6 files changed, 205 insertions(+), 2 deletions(-) (limited to 'commit-graph.h') diff --git a/alloc.c b/alloc.c index 12afadfacd..cf4f8b61e1 100644 --- a/alloc.c +++ b/alloc.c @@ -93,6 +93,7 @@ void *alloc_commit_node(void) struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; c->index = alloc_commit_index(); + c->graph_pos = COMMIT_NOT_FROM_GRAPH; return c; } diff --git a/commit-graph.c b/commit-graph.c index ea29c5c2d8..f745186e7f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -38,7 +38,6 @@ #define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \ GRAPH_OID_LEN + 8) - char *get_commit_graph_filename(const char *obj_dir) { return xstrfmt("%s/info/commit-graph", obj_dir); @@ -179,6 +178,145 @@ cleanup_fail: exit(1); } +/* global storage */ +static struct commit_graph *commit_graph = NULL; + +static void prepare_commit_graph_one(const char *obj_dir) +{ + char *graph_name; + + if (commit_graph) + return; + + graph_name = get_commit_graph_filename(obj_dir); + commit_graph = load_commit_graph_one(graph_name); + + FREE_AND_NULL(graph_name); +} + +static int prepare_commit_graph_run_once = 0; +static void prepare_commit_graph(void) +{ + struct alternate_object_database *alt; + char *obj_dir; + + if (prepare_commit_graph_run_once) + return; + prepare_commit_graph_run_once = 1; + + obj_dir = get_object_directory(); + prepare_commit_graph_one(obj_dir); + prepare_alt_odb(); + for (alt = alt_odb_list; !commit_graph && alt; alt = alt->next) + prepare_commit_graph_one(alt->path); +} + +static void close_commit_graph(void) +{ + if (!commit_graph) + return; + + if (commit_graph->graph_fd >= 0) { + munmap((void *)commit_graph->data, commit_graph->data_len); + commit_graph->data = NULL; + close(commit_graph->graph_fd); + } + + FREE_AND_NULL(commit_graph); +} + +static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos) +{ + return bsearch_hash(oid->hash, g->chunk_oid_fanout, + g->chunk_oid_lookup, g->hash_len, pos); +} + +static struct commit_list **insert_parent_or_die(struct commit_graph *g, + uint64_t pos, + struct commit_list **pptr) +{ + struct commit *c; + struct object_id oid; + hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos); + c = lookup_commit(&oid); + if (!c) + die("could not find commit %s", oid_to_hex(&oid)); + c->graph_pos = pos; + return &commit_list_insert(c, pptr)->next; +} + +static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos) +{ + struct object_id oid; + uint32_t edge_value; + uint32_t *parent_data_ptr; + uint64_t date_low, date_high; + struct commit_list **pptr; + const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos; + + item->object.parsed = 1; + item->graph_pos = pos; + + hashcpy(oid.hash, commit_data); + item->tree = lookup_tree(&oid); + + date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; + date_low = get_be32(commit_data + g->hash_len + 12); + item->date = (timestamp_t)((date_high << 32) | date_low); + + pptr = &item->parents; + + edge_value = get_be32(commit_data + g->hash_len); + if (edge_value == GRAPH_PARENT_NONE) + return 1; + pptr = insert_parent_or_die(g, edge_value, pptr); + + edge_value = get_be32(commit_data + g->hash_len + 4); + if (edge_value == GRAPH_PARENT_NONE) + return 1; + if (!(edge_value & GRAPH_OCTOPUS_EDGES_NEEDED)) { + pptr = insert_parent_or_die(g, edge_value, pptr); + return 1; + } + + parent_data_ptr = (uint32_t*)(g->chunk_large_edges + + 4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK)); + do { + edge_value = get_be32(parent_data_ptr); + pptr = insert_parent_or_die(g, + edge_value & GRAPH_EDGE_LAST_MASK, + pptr); + parent_data_ptr++; + } while (!(edge_value & GRAPH_LAST_EDGE)); + + return 1; +} + +int parse_commit_in_graph(struct commit *item) +{ + if (!core_commit_graph) + return 0; + if (item->object.parsed) + return 1; + + prepare_commit_graph(); + if (commit_graph) { + uint32_t pos; + int found; + if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { + pos = item->graph_pos; + found = 1; + } else { + found = bsearch_graph(commit_graph, &(item->object.oid), &pos); + } + + if (found) + return fill_commit_in_graph(item, commit_graph, pos); + } + + return 0; +} + static void write_graph_chunk_fanout(struct hashfile *f, struct commit **commits, int nr_commits) @@ -530,6 +668,7 @@ void write_commit_graph(const char *obj_dir) write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr); write_graph_chunk_large_edges(f, commits.list, commits.nr); + close_commit_graph(); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); commit_lock_file(&lk); diff --git a/commit-graph.h b/commit-graph.h index 2528478f06..73b28beed1 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -5,6 +5,18 @@ char *get_commit_graph_filename(const char *obj_dir); +/* + * Given a commit struct, try to fill the commit struct info, including: + * 1. tree object + * 2. date + * 3. parents. + * + * Returns 1 if and only if the commit was found in the packed graph. + * + * See parse_commit_buffer() for the fallback after this call. + */ +int parse_commit_in_graph(struct commit *item); + struct commit_graph { int graph_fd; diff --git a/commit.c b/commit.c index e8a49b9c84..eb61729d10 100644 --- a/commit.c +++ b/commit.c @@ -1,6 +1,7 @@ #include "cache.h" #include "tag.h" #include "commit.h" +#include "commit-graph.h" #include "pkt-line.h" #include "utf8.h" #include "diff.h" @@ -383,6 +384,8 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return -1; if (item->object.parsed) return 0; + if (parse_commit_in_graph(item)) + return 0; buffer = read_sha1_file(item->object.oid.hash, &type, &size); if (!buffer) return quiet_on_missing ? -1 : diff --git a/commit.h b/commit.h index 0fb8271665..e57ae4b583 100644 --- a/commit.h +++ b/commit.h @@ -9,6 +9,8 @@ #include "string-list.h" #include "pretty.h" +#define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF + struct commit_list { struct commit *item; struct commit_list *next; @@ -21,6 +23,7 @@ struct commit { timestamp_t date; struct commit_list *parents; struct tree *tree; + uint32_t graph_pos; }; extern int save_commit_buffer; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 2f44f91193..51de9cc455 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -7,6 +7,7 @@ test_expect_success 'setup full repo' ' mkdir full && cd "$TRASH_DIRECTORY/full" && git init && + git config core.commitGraph true && objdir=".git/objects" ' @@ -26,6 +27,29 @@ test_expect_success 'create commits and repack' ' git repack ' +graph_git_two_modes() { + git -c core.graph=true $1 >output + git -c core.graph=false $1 >expect + test_cmp output expect +} + +graph_git_behavior() { + MSG=$1 + DIR=$2 + BRANCH=$3 + COMPARE=$4 + test_expect_success "check normal git operations: $MSG" ' + cd "$TRASH_DIRECTORY/$DIR" && + graph_git_two_modes "log --oneline $BRANCH" && + graph_git_two_modes "log --topo-order $BRANCH" && + graph_git_two_modes "log --graph $COMPARE..$BRANCH" && + graph_git_two_modes "branch -vv" && + graph_git_two_modes "merge-base -a $BRANCH $COMPARE" + ' +} + +graph_git_behavior 'no graph' full commits/3 commits/1 + graph_read_expect() { OPTIONAL="" NUM_CHUNKS=3 @@ -50,6 +74,8 @@ test_expect_success 'write graph' ' graph_read_expect "3" ' +graph_git_behavior 'graph exists' full commits/3 commits/1 + test_expect_success 'Add more commits' ' cd "$TRASH_DIRECTORY/full" && git reset --hard commits/1 && @@ -86,7 +112,6 @@ test_expect_success 'Add more commits' ' # |___/____/ # 1 - test_expect_success 'write graph with merges' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && @@ -94,6 +119,10 @@ test_expect_success 'write graph with merges' ' graph_read_expect "10" "large_edges" ' +graph_git_behavior 'merge 1 vs 2' full merge/1 merge/2 +graph_git_behavior 'merge 1 vs 3' full merge/1 merge/3 +graph_git_behavior 'merge 2 vs 3' full merge/2 merge/3 + test_expect_success 'Add one more commit' ' cd "$TRASH_DIRECTORY/full" && test_commit 8 && @@ -115,6 +144,9 @@ test_expect_success 'Add one more commit' ' # |___/____/ # 1 +graph_git_behavior 'mixed mode, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'mixed mode, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'write graph with new commit' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && @@ -122,6 +154,9 @@ test_expect_success 'write graph with new commit' ' graph_read_expect "11" "large_edges" ' +graph_git_behavior 'full graph, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'full graph, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'write graph with nothing new' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && @@ -129,13 +164,20 @@ test_expect_success 'write graph with nothing new' ' graph_read_expect "11" "large_edges" ' +graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'cleared graph, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'setup bare repo' ' cd "$TRASH_DIRECTORY" && git clone --bare --no-local full bare && cd bare && + git config core.commitGraph true && baredir="./objects" ' +graph_git_behavior 'bare repo, commit 8 vs merge 1' bare commits/8 merge/1 +graph_git_behavior 'bare repo, commit 8 vs merge 2' bare commits/8 merge/2 + test_expect_success 'write graph in bare repo' ' cd "$TRASH_DIRECTORY/bare" && git commit-graph write && @@ -143,4 +185,7 @@ test_expect_success 'write graph in bare repo' ' graph_read_expect "11" "large_edges" ' +graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1 +graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2 + test_done -- cgit v1.3 From 049d51a2bb9a03d2f2c2cce1ae41e57dbbf42244 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 10 Apr 2018 08:56:06 -0400 Subject: commit-graph: read only from specific pack-indexes Teach git-commit-graph to inspect the objects only in a certain list of pack-indexes within the given pack directory. This allows updating the commit graph iteratively. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 11 ++++++++++- builtin/commit-graph.c | 33 ++++++++++++++++++++++++++++++--- commit-graph.c | 26 ++++++++++++++++++++++++-- commit-graph.h | 4 +++- packfile.c | 4 ++-- packfile.h | 2 ++ t/t5318-commit-graph.sh | 10 ++++++++++ 7 files changed, 81 insertions(+), 9 deletions(-) (limited to 'commit-graph.h') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 8aad8303f5..8143cc3f07 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -34,7 +34,9 @@ COMMANDS 'write':: Write a commit graph file based on the commits found in packfiles. -Includes all commits from the existing commit graph file. ++ +With the `--stdin-packs` option, generate the new commit graph by +walking objects only in the specified pack-indexes. 'read':: @@ -51,6 +53,13 @@ EXAMPLES $ git commit-graph write ------------------------------------------------ +* Write a graph file, extending the current graph file using commits +* in . ++ +------------------------------------------------ +$ echo | git commit-graph write --stdin-packs +------------------------------------------------ + * Read basic information from the commit-graph file. + ------------------------------------------------ diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index efd39331d7..5c70199003 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -8,7 +8,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), - N_("git commit-graph write [--object-dir ]"), + N_("git commit-graph write [--object-dir ] [--stdin-packs]"), NULL }; @@ -18,12 +18,13 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ]"), + N_("git commit-graph write [--object-dir ] [--stdin-packs]"), NULL }; static struct opts_commit_graph { const char *obj_dir; + int stdin_packs; } opts; static int graph_read(int argc, const char **argv) @@ -76,10 +77,18 @@ static int graph_read(int argc, const char **argv) static int graph_write(int argc, const char **argv) { + const char **pack_indexes = NULL; + int packs_nr = 0; + const char **lines = NULL; + int lines_nr = 0; + int lines_alloc = 0; + static struct option builtin_commit_graph_write_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"), N_("The object directory to store the graph")), + OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, + N_("scan pack-indexes listed by stdin for commits")), OPT_END(), }; @@ -90,7 +99,25 @@ static int graph_write(int argc, const char **argv) if (!opts.obj_dir) opts.obj_dir = get_object_directory(); - write_commit_graph(opts.obj_dir); + if (opts.stdin_packs) { + struct strbuf buf = STRBUF_INIT; + lines_nr = 0; + lines_alloc = 128; + ALLOC_ARRAY(lines, lines_alloc); + + while (strbuf_getline(&buf, stdin) != EOF) { + ALLOC_GROW(lines, lines_nr + 1, lines_alloc); + lines[lines_nr++] = strbuf_detach(&buf, NULL); + } + + pack_indexes = lines; + packs_nr = lines_nr; + } + + write_commit_graph(opts.obj_dir, + pack_indexes, + packs_nr); + return 0; } diff --git a/commit-graph.c b/commit-graph.c index f745186e7f..70472840a3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -549,7 +549,9 @@ static void close_reachable(struct packed_oid_list *oids) } } -void write_commit_graph(const char *obj_dir) +void write_commit_graph(const char *obj_dir, + const char **pack_indexes, + int nr_packs) { struct packed_oid_list oids; struct packed_commit_list commits; @@ -571,7 +573,27 @@ void write_commit_graph(const char *obj_dir) oids.alloc = 1024; ALLOC_ARRAY(oids.list, oids.alloc); - for_each_packed_object(add_packed_commits, &oids, 0); + if (pack_indexes) { + struct strbuf packname = STRBUF_INIT; + int dirlen; + strbuf_addf(&packname, "%s/pack/", obj_dir); + dirlen = packname.len; + for (i = 0; i < nr_packs; i++) { + struct packed_git *p; + strbuf_setlen(&packname, dirlen); + strbuf_addstr(&packname, pack_indexes[i]); + p = add_packed_git(packname.buf, packname.len, 1); + if (!p) + die("error adding pack %s", packname.buf); + if (open_pack_index(p)) + die("error opening index for %s", packname.buf); + for_each_object_in_pack(p, add_packed_commits, &oids); + close_pack(p); + } + strbuf_release(&packname); + } else + for_each_packed_object(add_packed_commits, &oids, 0); + close_reachable(&oids); QSORT(oids.list, oids.nr, commit_compare); diff --git a/commit-graph.h b/commit-graph.h index 73b28beed1..f065f0866f 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -36,6 +36,8 @@ struct commit_graph { struct commit_graph *load_commit_graph_one(const char *graph_file); -void write_commit_graph(const char *obj_dir); +void write_commit_graph(const char *obj_dir, + const char **pack_indexes, + int nr_packs); #endif diff --git a/packfile.c b/packfile.c index 5d07f330c8..f14179f937 100644 --- a/packfile.c +++ b/packfile.c @@ -304,7 +304,7 @@ void close_pack_index(struct packed_git *p) } } -static void close_pack(struct packed_git *p) +void close_pack(struct packed_git *p) { close_pack_windows(p); close_pack_fd(p); @@ -1850,7 +1850,7 @@ int has_pack_index(const unsigned char *sha1) return 1; } -static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) +int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) { uint32_t i; int r = 0; diff --git a/packfile.h b/packfile.h index a7fca598d6..b341f2bf5e 100644 --- a/packfile.h +++ b/packfile.h @@ -63,6 +63,7 @@ extern void close_pack_index(struct packed_git *); extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); extern void close_pack_windows(struct packed_git *); +extern void close_pack(struct packed_git *); extern void close_all_packs(void); extern void unuse_pack(struct pack_window **); extern void clear_delta_base_cache(void); @@ -140,6 +141,7 @@ typedef int each_packed_object_fn(const struct object_id *oid, struct packed_git *pack, uint32_t pos, void *data); +extern int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn, void *data); extern int for_each_packed_object(each_packed_object_fn, void *, unsigned flags); /* diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 51de9cc455..3bb44d0c09 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -167,6 +167,16 @@ test_expect_success 'write graph with nothing new' ' graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1 graph_git_behavior 'cleared graph, commit 8 vs merge 2' full commits/8 merge/2 +test_expect_success 'build graph from latest pack with closure' ' + cd "$TRASH_DIRECTORY/full" && + cat new-idx | git commit-graph write --stdin-packs && + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "9" "large_edges" +' + +graph_git_behavior 'graph from pack, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'graph from pack, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'setup bare repo' ' cd "$TRASH_DIRECTORY" && git clone --bare --no-local full bare && -- cgit v1.3 From 3d5df01b5e42416a59e857135e932bbdd8cc3ba0 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 10 Apr 2018 08:56:07 -0400 Subject: commit-graph: build graph from starting commits Teach git-commit-graph to read commits from stdin when the --stdin-commits flag is specified. Commits reachable from these commits are added to the graph. This is a much faster way to construct the graph than inspecting all packed objects, but is restricted to known tips. For the Linux repository, 700,000+ commits were added to the graph file starting from 'master' in 7-9 seconds, depending on the number of packfiles in the repo (1, 24, or 120). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 14 +++++++++++++- builtin/commit-graph.c | 27 +++++++++++++++++++++------ commit-graph.c | 27 +++++++++++++++++++++++++-- commit-graph.h | 4 +++- t/t5318-commit-graph.sh | 13 +++++++++++++ 5 files changed, 75 insertions(+), 10 deletions(-) (limited to 'commit-graph.h') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 8143cc3f07..442ac243e6 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -36,7 +36,13 @@ COMMANDS Write a commit graph file based on the commits found in packfiles. + With the `--stdin-packs` option, generate the new commit graph by -walking objects only in the specified pack-indexes. +walking objects only in the specified pack-indexes. (Cannot be combined +with --stdin-commits.) ++ +With the `--stdin-commits` option, generate the new commit graph by +walking commits starting at the commits specified in stdin as a list +of OIDs in hex, one OID per line. (Cannot be combined with +--stdin-packs.) 'read':: @@ -60,6 +66,12 @@ $ git commit-graph write $ echo | git commit-graph write --stdin-packs ------------------------------------------------ +* Write a graph file containing all reachable commits. ++ +------------------------------------------------ +$ git show-ref -s | git commit-graph write --stdin-commits +------------------------------------------------ + * Read basic information from the commit-graph file. + ------------------------------------------------ diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 5c70199003..b5c0b08905 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -8,7 +8,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--stdin-packs]"), + N_("git commit-graph write [--object-dir ] [--stdin-packs|--stdin-commits]"), NULL }; @@ -18,13 +18,14 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--stdin-packs]"), + N_("git commit-graph write [--object-dir ] [--stdin-packs|--stdin-commits]"), NULL }; static struct opts_commit_graph { const char *obj_dir; int stdin_packs; + int stdin_commits; } opts; static int graph_read(int argc, const char **argv) @@ -79,6 +80,8 @@ static int graph_write(int argc, const char **argv) { const char **pack_indexes = NULL; int packs_nr = 0; + const char **commit_hex = NULL; + int commits_nr = 0; const char **lines = NULL; int lines_nr = 0; int lines_alloc = 0; @@ -89,6 +92,8 @@ static int graph_write(int argc, const char **argv) N_("The object directory to store the graph")), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, N_("scan pack-indexes listed by stdin for commits")), + OPT_BOOL(0, "stdin-commits", &opts.stdin_commits, + N_("start walk at commits listed by stdin")), OPT_END(), }; @@ -96,10 +101,12 @@ static int graph_write(int argc, const char **argv) builtin_commit_graph_write_options, builtin_commit_graph_write_usage, 0); + if (opts.stdin_packs && opts.stdin_commits) + die(_("cannot use both --stdin-commits and --stdin-packs")); if (!opts.obj_dir) opts.obj_dir = get_object_directory(); - if (opts.stdin_packs) { + if (opts.stdin_packs || opts.stdin_commits) { struct strbuf buf = STRBUF_INIT; lines_nr = 0; lines_alloc = 128; @@ -110,13 +117,21 @@ static int graph_write(int argc, const char **argv) lines[lines_nr++] = strbuf_detach(&buf, NULL); } - pack_indexes = lines; - packs_nr = lines_nr; + if (opts.stdin_packs) { + pack_indexes = lines; + packs_nr = lines_nr; + } + if (opts.stdin_commits) { + commit_hex = lines; + commits_nr = lines_nr; + } } write_commit_graph(opts.obj_dir, pack_indexes, - packs_nr); + packs_nr, + commit_hex, + commits_nr); return 0; } diff --git a/commit-graph.c b/commit-graph.c index 70472840a3..a59d1e387b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -551,7 +551,9 @@ static void close_reachable(struct packed_oid_list *oids) void write_commit_graph(const char *obj_dir, const char **pack_indexes, - int nr_packs) + int nr_packs, + const char **commit_hex, + int nr_commits) { struct packed_oid_list oids; struct packed_commit_list commits; @@ -591,7 +593,28 @@ void write_commit_graph(const char *obj_dir, close_pack(p); } strbuf_release(&packname); - } else + } + + if (commit_hex) { + for (i = 0; i < nr_commits; i++) { + const char *end; + struct object_id oid; + struct commit *result; + + if (commit_hex[i] && parse_oid_hex(commit_hex[i], &oid, &end)) + continue; + + result = lookup_commit_reference_gently(&oid, 1); + + if (result) { + ALLOC_GROW(oids.list, oids.nr + 1, oids.alloc); + oidcpy(&oids.list[oids.nr], &(result->object.oid)); + oids.nr++; + } + } + } + + if (!pack_indexes && !commit_hex) for_each_packed_object(add_packed_commits, &oids, 0); close_reachable(&oids); diff --git a/commit-graph.h b/commit-graph.h index f065f0866f..fd035101b2 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -38,6 +38,8 @@ struct commit_graph *load_commit_graph_one(const char *graph_file); void write_commit_graph(const char *obj_dir, const char **pack_indexes, - int nr_packs); + int nr_packs, + const char **commit_hex, + int nr_commits); #endif diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 3bb44d0c09..c28cfb5d7f 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -177,6 +177,19 @@ test_expect_success 'build graph from latest pack with closure' ' graph_git_behavior 'graph from pack, commit 8 vs merge 1' full commits/8 merge/1 graph_git_behavior 'graph from pack, commit 8 vs merge 2' full commits/8 merge/2 +test_expect_success 'build graph from commits with closure' ' + cd "$TRASH_DIRECTORY/full" && + git tag -a -m "merge" tag/merge merge/2 && + git rev-parse tag/merge >commits-in && + git rev-parse merge/1 >>commits-in && + cat commits-in | git commit-graph write --stdin-commits && + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "6" +' + +graph_git_behavior 'graph from commits, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'graph from commits, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'setup bare repo' ' cd "$TRASH_DIRECTORY" && git clone --bare --no-local full bare && -- cgit v1.3 From 7547b95b4fbb8591726b1d9381c176cc27fc6aea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 10 Apr 2018 08:56:08 -0400 Subject: commit-graph: implement "--append" option Teach git-commit-graph to add all commits from the existing commit-graph file to the file about to be written. This should be used when adding new commits without performing garbage collection. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- Documentation/git-commit-graph.txt | 10 ++++++++++ builtin/commit-graph.c | 10 +++++++--- commit-graph.c | 17 ++++++++++++++++- commit-graph.h | 3 ++- t/t5318-commit-graph.sh | 10 ++++++++++ 5 files changed, 45 insertions(+), 5 deletions(-) (limited to 'commit-graph.h') diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 442ac243e6..4c97b555cc 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -43,6 +43,9 @@ With the `--stdin-commits` option, generate the new commit graph by walking commits starting at the commits specified in stdin as a list of OIDs in hex, one OID per line. (Cannot be combined with --stdin-packs.) ++ +With the `--append` option, include all commits that are present in the +existing commit-graph file. 'read':: @@ -72,6 +75,13 @@ $ echo | git commit-graph write --stdin-packs $ git show-ref -s | git commit-graph write --stdin-commits ------------------------------------------------ +* Write a graph file containing all commits in the current +* commit-graph file along with those reachable from HEAD. ++ +------------------------------------------------ +$ git rev-parse HEAD | git commit-graph write --stdin-commits --append +------------------------------------------------ + * Read basic information from the commit-graph file. + ------------------------------------------------ diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index b5c0b08905..37420ae0fd 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -8,7 +8,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--stdin-packs|--stdin-commits]"), NULL }; @@ -18,7 +18,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append] [--stdin-packs|--stdin-commits]"), NULL }; @@ -26,6 +26,7 @@ static struct opts_commit_graph { const char *obj_dir; int stdin_packs; int stdin_commits; + int append; } opts; static int graph_read(int argc, const char **argv) @@ -94,6 +95,8 @@ static int graph_write(int argc, const char **argv) N_("scan pack-indexes listed by stdin for commits")), OPT_BOOL(0, "stdin-commits", &opts.stdin_commits, N_("start walk at commits listed by stdin")), + OPT_BOOL(0, "append", &opts.append, + N_("include all commits already in the commit-graph file")), OPT_END(), }; @@ -131,7 +134,8 @@ static int graph_write(int argc, const char **argv) pack_indexes, packs_nr, commit_hex, - commits_nr); + commits_nr, + opts.append); return 0; } diff --git a/commit-graph.c b/commit-graph.c index a59d1e387b..3ff8c84c0e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -553,7 +553,8 @@ void write_commit_graph(const char *obj_dir, const char **pack_indexes, int nr_packs, const char **commit_hex, - int nr_commits) + int nr_commits, + int append) { struct packed_oid_list oids; struct packed_commit_list commits; @@ -571,10 +572,24 @@ void write_commit_graph(const char *obj_dir, oids.nr = 0; oids.alloc = approximate_object_count() / 4; + if (append) { + prepare_commit_graph_one(obj_dir); + if (commit_graph) + oids.alloc += commit_graph->num_commits; + } + if (oids.alloc < 1024) oids.alloc = 1024; ALLOC_ARRAY(oids.list, oids.alloc); + if (append && commit_graph) { + for (i = 0; i < commit_graph->num_commits; i++) { + const unsigned char *hash = commit_graph->chunk_oid_lookup + + commit_graph->hash_len * i; + hashcpy(oids.list[oids.nr++].hash, hash); + } + } + if (pack_indexes) { struct strbuf packname = STRBUF_INIT; int dirlen; diff --git a/commit-graph.h b/commit-graph.h index fd035101b2..e1d8580c98 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -40,6 +40,7 @@ void write_commit_graph(const char *obj_dir, const char **pack_indexes, int nr_packs, const char **commit_hex, - int nr_commits); + int nr_commits, + int append); #endif diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index c28cfb5d7f..a380419b65 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -190,6 +190,16 @@ test_expect_success 'build graph from commits with closure' ' graph_git_behavior 'graph from commits, commit 8 vs merge 1' full commits/8 merge/1 graph_git_behavior 'graph from commits, commit 8 vs merge 2' full commits/8 merge/2 +test_expect_success 'build graph from commits with append' ' + cd "$TRASH_DIRECTORY/full" && + git rev-parse merge/3 | git commit-graph write --stdin-commits --append && + test_path_is_file $objdir/info/commit-graph && + graph_read_expect "10" "large_edges" +' + +graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1 +graph_git_behavior 'append graph, commit 8 vs merge 2' full commits/8 merge/2 + test_expect_success 'setup bare repo' ' cd "$TRASH_DIRECTORY" && git clone --bare --no-local full bare && -- cgit v1.3