From c8d521faf72590fd4cd9bab3d20eb3de139f69d5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Aug 2018 08:13:07 +0200 Subject: Add delta-islands.{c,h} Hosting providers that allow users to "fork" existing repos want those forks to share as much disk space as possible. Alternates are an existing solution to keep all the objects from all the forks into a unique central repo, but this can have some drawbacks. Especially when packing the central repo, deltas will be created between objects from different forks. This can make cloning or fetching a fork much slower and much more CPU intensive as Git might have to compute new deltas for many objects to avoid sending objects from a different fork. Because the inefficiency primarily arises when an object is deltified against another object that does not exist in the same fork, we partition objects into sets that appear in the same fork, and define "delta islands". When finding delta base, we do not allow an object outside the same island to be considered as its base. So "delta islands" is a way to store objects from different forks in the same repo and packfile without having deltas between objects from different forks. This patch implements the delta islands mechanism in "delta-islands.{c,h}", but does not yet make use of it. A few new fields are added in 'struct object_entry' in "pack-objects.h" though. The documentation will follow in a patch that actually uses delta islands in "builtin/pack-objects.c". Signed-off-by: Jeff King Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- pack-objects.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'pack-objects.h') diff --git a/pack-objects.h b/pack-objects.h index edf74dabdd..8eecd67991 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -100,6 +100,10 @@ struct object_entry { unsigned type_:TYPE_BITS; unsigned no_try_delta:1; unsigned in_pack_type:TYPE_BITS; /* could be delta */ + + unsigned int tree_depth; /* should be repositioned for packing? */ + unsigned char layer; + unsigned preferred_base:1; /* * we do not pack this, but is available * to be used as the base object to delta -- cgit v1.3 From 108f530385e969feab343b2b8acadeb7bb670009 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Thu, 16 Aug 2018 08:13:12 +0200 Subject: pack-objects: move tree_depth into 'struct packing_data' This reduces the size of 'struct object_entry' and therefore makes packing objects more efficient. This also renames cmp_tree_depth() into tree_depth_compare(), as it is more modern to have the name of the compare functions end with "compare". Helped-by: Jeff King Helped-by: Duy Nguyen Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- delta-islands.c | 27 ++++++++++++++++++--------- pack-objects.c | 6 ++++++ pack-objects.h | 21 ++++++++++++++++++++- 4 files changed, 46 insertions(+), 12 deletions(-) (limited to 'pack-objects.h') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 30ef48dc43..fd3e514b31 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2716,8 +2716,8 @@ static void show_object(struct object *obj, const char *name, void *data) depth++; ent = packlist_find(&to_pack, obj->oid.hash, NULL); - if (ent && depth > ent->tree_depth) - ent->tree_depth = depth; + if (ent && depth > oe_tree_depth(&to_pack, ent)) + oe_set_tree_depth(&to_pack, ent, depth); } } diff --git a/delta-islands.c b/delta-islands.c index e7123d44a3..b0b9157c85 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -224,17 +224,23 @@ static void mark_remote_island_1(struct remote_island *rl, int is_core_island) island_counter++; } -static int cmp_tree_depth(const void *va, const void *vb) +struct tree_islands_todo { + struct object_entry *entry; + unsigned int depth; +}; + +static int tree_depth_compare(const void *a, const void *b) { - struct object_entry *a = *(struct object_entry **)va; - struct object_entry *b = *(struct object_entry **)vb; - return a->tree_depth - b->tree_depth; + const struct tree_islands_todo *todo_a = a; + const struct tree_islands_todo *todo_b = b; + + return todo_a->depth - todo_b->depth; } void resolve_tree_islands(int progress, struct packing_data *to_pack) { struct progress *progress_state = NULL; - struct object_entry **todo; + struct tree_islands_todo *todo; int nr = 0; int i; @@ -250,16 +256,19 @@ void resolve_tree_islands(int progress, struct packing_data *to_pack) */ ALLOC_ARRAY(todo, to_pack->nr_objects); for (i = 0; i < to_pack->nr_objects; i++) { - if (oe_type(&to_pack->objects[i]) == OBJ_TREE) - todo[nr++] = &to_pack->objects[i]; + if (oe_type(&to_pack->objects[i]) == OBJ_TREE) { + todo[nr].entry = &to_pack->objects[i]; + todo[nr].depth = oe_tree_depth(to_pack, &to_pack->objects[i]); + nr++; + } } - QSORT(todo, nr, cmp_tree_depth); + QSORT(todo, nr, tree_depth_compare); if (progress) progress_state = start_progress(_("Propagating island marks"), nr); for (i = 0; i < nr; i++) { - struct object_entry *ent = todo[i]; + struct object_entry *ent = todo[i].entry; struct island_bitmap *root_marks; struct tree *tree; struct tree_desc desc; diff --git a/pack-objects.c b/pack-objects.c index 92708522e7..30314572e6 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -160,6 +160,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (!pdata->in_pack_by_idx) REALLOC_ARRAY(pdata->in_pack, pdata->nr_alloc); + + if (pdata->tree_depth) + REALLOC_ARRAY(pdata->tree_depth, pdata->nr_alloc); } new_entry = pdata->objects + pdata->nr_objects++; @@ -175,5 +178,8 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->in_pack) pdata->in_pack[pdata->nr_objects - 1] = NULL; + if (pdata->tree_depth) + pdata->tree_depth[pdata->nr_objects - 1] = 0; + return new_entry; } diff --git a/pack-objects.h b/pack-objects.h index 8eecd67991..3cb5527eeb 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -101,7 +101,6 @@ struct object_entry { unsigned no_try_delta:1; unsigned in_pack_type:TYPE_BITS; /* could be delta */ - unsigned int tree_depth; /* should be repositioned for packing? */ unsigned char layer; unsigned preferred_base:1; /* @@ -145,6 +144,9 @@ struct packing_data { struct packed_git **in_pack; uintmax_t oe_size_limit; + + /* delta islands */ + unsigned int *tree_depth; }; void prepare_packing_data(struct packing_data *pdata); @@ -350,4 +352,21 @@ static inline void oe_set_delta_size(struct packing_data *pack, "where delta size is the same as entry size"); } +static inline unsigned int oe_tree_depth(struct packing_data *pack, + struct object_entry *e) +{ + if (!pack->tree_depth) + return 0; + return pack->tree_depth[e - pack->objects]; +} + +static inline void oe_set_tree_depth(struct packing_data *pack, + struct object_entry *e, + unsigned int tree_depth) +{ + if (!pack->tree_depth) + ALLOC_ARRAY(pack->tree_depth, pack->nr_objects); + pack->tree_depth[e - pack->objects] = tree_depth; +} + #endif -- cgit v1.3 From fe0ac2fb7f8e87d37ef83dcee2d93901d58d8277 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Thu, 16 Aug 2018 08:13:13 +0200 Subject: pack-objects: move 'layer' into 'struct packing_data' This reduces the size of 'struct object_entry' from 88 bytes to 80 and therefore makes packing objects more efficient. For example on a Linux repo with 12M objects, `git pack-objects --all` needs extra 96MB memory even if the layer feature is not used. Helped-by: Jeff King Helped-by: Duy Nguyen Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 4 ++-- delta-islands.c | 4 ++-- pack-objects.c | 6 ++++++ pack-objects.h | 20 ++++++++++++++++++-- 4 files changed, 28 insertions(+), 6 deletions(-) (limited to 'pack-objects.h') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fd3e514b31..d5d91eeed5 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -611,7 +611,7 @@ static inline void add_to_write_order(struct object_entry **wo, unsigned int *endp, struct object_entry *e) { - if (e->filled || e->layer != write_layer) + if (e->filled || oe_layer(&to_pack, e) != write_layer) return; wo[(*endp)++] = e; e->filled = 1; @@ -714,7 +714,7 @@ static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end) * Finally all the rest in really tight order */ for (i = last_untagged; i < to_pack.nr_objects; i++) { - if (!objects[i].filled && objects[i].layer == write_layer) + if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer) add_family_to_write_order(wo, wo_end, &objects[i]); } } diff --git a/delta-islands.c b/delta-islands.c index b0b9157c85..8e5018e406 100644 --- a/delta-islands.c +++ b/delta-islands.c @@ -488,13 +488,13 @@ int compute_pack_layers(struct packing_data *to_pack) struct object_entry *entry = &to_pack->objects[i]; khiter_t pos = kh_get_sha1(island_marks, entry->idx.oid.hash); - entry->layer = 1; + oe_set_layer(to_pack, entry, 1); if (pos < kh_end(island_marks)) { struct island_bitmap *bitmap = kh_value(island_marks, pos); if (island_bitmap_get(bitmap, island_counter_core)) - entry->layer = 0; + oe_set_layer(to_pack, entry, 0); } } diff --git a/pack-objects.c b/pack-objects.c index 30314572e6..98389460c2 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -163,6 +163,9 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->tree_depth) REALLOC_ARRAY(pdata->tree_depth, pdata->nr_alloc); + + if (pdata->layer) + REALLOC_ARRAY(pdata->layer, pdata->nr_alloc); } new_entry = pdata->objects + pdata->nr_objects++; @@ -181,5 +184,8 @@ struct object_entry *packlist_alloc(struct packing_data *pdata, if (pdata->tree_depth) pdata->tree_depth[pdata->nr_objects - 1] = 0; + if (pdata->layer) + pdata->layer[pdata->nr_objects - 1] = 0; + return new_entry; } diff --git a/pack-objects.h b/pack-objects.h index 3cb5527eeb..ad3c208764 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -101,8 +101,6 @@ struct object_entry { unsigned no_try_delta:1; unsigned in_pack_type:TYPE_BITS; /* could be delta */ - unsigned char layer; - unsigned preferred_base:1; /* * we do not pack this, but is available * to be used as the base object to delta @@ -147,6 +145,7 @@ struct packing_data { /* delta islands */ unsigned int *tree_depth; + unsigned char *layer; }; void prepare_packing_data(struct packing_data *pdata); @@ -369,4 +368,21 @@ static inline void oe_set_tree_depth(struct packing_data *pack, pack->tree_depth[e - pack->objects] = tree_depth; } +static inline unsigned char oe_layer(struct packing_data *pack, + struct object_entry *e) +{ + if (!pack->layer) + return 0; + return pack->layer[e - pack->objects]; +} + +static inline void oe_set_layer(struct packing_data *pack, + struct object_entry *e, + unsigned char layer) +{ + if (!pack->layer) + ALLOC_ARRAY(pack->layer, pack->nr_objects); + pack->layer[e - pack->objects] = layer; +} + #endif -- cgit v1.3