From aeb4a51ef82c71c9a65d11f77aeedb53bea0e983 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 25 May 2013 11:08:08 +0200 Subject: object_array: add function object_array_filter() Add a function that allows unwanted entries in an object_array to be removed. This encapsulation is a step towards giving object_array ownership of its entries' name memory. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- object.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'object.c') diff --git a/object.c b/object.c index 20703f52ed..fcd4a82c13 100644 --- a/object.c +++ b/object.c @@ -278,6 +278,22 @@ void add_object_array_with_mode(struct object *obj, const char *name, struct obj array->nr = ++nr; } +void object_array_filter(struct object_array *array, + object_array_each_func_t want, void *cb_data) +{ + unsigned nr = array->nr, src, dst; + struct object_array_entry *objects = array->objects; + + for (src = dst = 0; src < nr; src++) { + if (want(&objects[src], cb_data)) { + if (src != dst) + objects[dst] = objects[src]; + dst++; + } + } + array->nr = dst; +} + void object_array_remove_duplicates(struct object_array *array) { unsigned int ref, src, dst; -- cgit v1.3-5-g9baa From 1506510c170d23b8f769757dc81904f334c40281 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 25 May 2013 11:08:10 +0200 Subject: object_array_remove_duplicates(): rewrite to reduce copying The old version copied one entry to its destination position, then deleted any matching entries from the tail of the array. This required the tail of the array to be copied multiple times. It didn't affect the complexity of the algorithm because the whole tail has to be searched through anyway. But all the copying was unnecessary. Instead, check for the existence of an entry with the same name in the *head* of the list before copying an entry to its final position. This way each entry has to be copied at most one time. Extract a helper function contains_name() to do a bit of the work. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- object.c | 32 +++++++++++++++++++++----------- object.h | 6 +++++- 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'object.c') diff --git a/object.c b/object.c index fcd4a82c13..10b5349461 100644 --- a/object.c +++ b/object.c @@ -294,22 +294,32 @@ void object_array_filter(struct object_array *array, array->nr = dst; } +/* + * Return true iff array already contains an entry with name. + */ +static int contains_name(struct object_array *array, const char *name) +{ + unsigned nr = array->nr, i; + struct object_array_entry *object = array->objects; + + for (i = 0; i < nr; i++, object++) + if (!strcmp(object->name, name)) + return 1; + return 0; +} + void object_array_remove_duplicates(struct object_array *array) { - unsigned int ref, src, dst; + unsigned nr = array->nr, src; struct object_array_entry *objects = array->objects; - for (ref = 0; ref + 1 < array->nr; ref++) { - for (src = ref + 1, dst = src; - src < array->nr; - src++) { - if (!strcmp(objects[ref].name, objects[src].name)) - continue; - if (src != dst) - objects[dst] = objects[src]; - dst++; + array->nr = 0; + for (src = 0; src < nr; src++) { + if (!contains_name(array, objects[src].name)) { + if (src != array->nr) + objects[array->nr] = objects[src]; + array->nr++; } - array->nr = dst; } } diff --git a/object.h b/object.h index 0d39ff4f9d..6c1c27fba6 100644 --- a/object.h +++ b/object.h @@ -96,7 +96,11 @@ typedef int (*object_array_each_func_t)(struct object_array_entry *, void *); void object_array_filter(struct object_array *array, object_array_each_func_t want, void *cb_data); -void object_array_remove_duplicates(struct object_array *); +/* + * Remove from array all but the first entry with a given name. + * Warning: this function uses an O(N^2) algorithm. + */ +void object_array_remove_duplicates(struct object_array *array); void clear_object_flags(unsigned flags); -- cgit v1.3-5-g9baa From 31faeb2088ef35d0108ad81df3550513d6cec798 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Sat, 25 May 2013 11:08:14 +0200 Subject: object_array_entry: fix memory handling of the name field Previously, the memory management of the object_array_entry::name field was inconsistent and undocumented. object_array_entries are ultimately created by a single function, add_object_array_with_mode(), which has an argument "const char *name". This function used to simply set the name field to reference the string pointed to by the name parameter, and nobody on the object_array side ever freed the memory. Thus, it assumed that the memory for the name field would be managed by the caller, and that the lifetime of that string would be at least as long as the lifetime of the object_array_entry. But callers were inconsistent: * Some passed pointers to constant strings or argv entries, which was OK. * Some passed pointers to newly-allocated memory, but didn't arrange for the memory ever to be freed. * Some passed the return value of sha1_to_hex(), which is a pointer to a statically-allocated buffer that can be overwritten at any time. * Some passed pointers to refnames that they received from a for_each_ref()-type iteration, but the lifetimes of such refnames is not guaranteed by the refs API. Bring consistency to this mess by changing object_array to make its own copy for the object_array_entry::name field and free this memory when an object_array_entry is deleted from the array. Many callers were passing the empty string as the name parameter, so as a performance optimization, treat the empty string specially. Instead of making a copy, store a pointer to a statically-allocated empty string to object_array_entry::name. When deleting such an entry, skip the free(). Change the callers that were already passing copies to add_object_array_with_mode() to either skip the copy, or (if the memory needed to be allocated anyway) freeing the memory itself. A part of this commit effectively reverts 70d26c6e76 read_revisions_from_stdin: make copies for handle_revision_arg because the copying introduced by that commit (which is still necessary) is now done at a deeper level. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- bundle.c | 2 +- object.c | 26 +++++++++++++++++++++++--- object.h | 8 +++++++- revision.c | 6 ++++-- 4 files changed, 35 insertions(+), 7 deletions(-) (limited to 'object.c') diff --git a/bundle.c b/bundle.c index 4b0e5cd51b..3d64311373 100644 --- a/bundle.c +++ b/bundle.c @@ -281,7 +281,7 @@ int create_bundle(struct bundle_header *header, const char *path, if (!get_sha1_hex(buf.buf + 1, sha1)) { struct object *object = parse_object_or_die(sha1, buf.buf); object->flags |= UNINTERESTING; - add_pending_object(&revs, object, xstrdup(buf.buf)); + add_pending_object(&revs, object, buf.buf); } } else if (!get_sha1_hex(buf.buf, sha1)) { struct object *object = parse_object_or_die(sha1, buf.buf); diff --git a/object.c b/object.c index 10b5349461..243d694a58 100644 --- a/object.c +++ b/object.c @@ -260,11 +260,18 @@ void add_object_array(struct object *obj, const char *name, struct object_array add_object_array_with_mode(obj, name, array, S_IFINVALID); } +/* + * A zero-length string to which object_array_entry::name can be + * initialized without requiring a malloc/free. + */ +static char object_array_slopbuf[1]; + void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode) { unsigned nr = array->nr; unsigned alloc = array->alloc; struct object_array_entry *objects = array->objects; + struct object_array_entry *entry; if (nr >= alloc) { alloc = (alloc + 32) * 2; @@ -272,9 +279,16 @@ void add_object_array_with_mode(struct object *obj, const char *name, struct obj array->alloc = alloc; array->objects = objects; } - objects[nr].item = obj; - objects[nr].name = name; - objects[nr].mode = mode; + entry = &objects[nr]; + entry->item = obj; + if (!name) + entry->name = NULL; + else if (!*name) + /* Use our own empty string instead of allocating one: */ + entry->name = object_array_slopbuf; + else + entry->name = xstrdup(name); + entry->mode = mode; array->nr = ++nr; } @@ -289,6 +303,9 @@ void object_array_filter(struct object_array *array, if (src != dst) objects[dst] = objects[src]; dst++; + } else { + if (objects[src].name != object_array_slopbuf) + free(objects[src].name); } } array->nr = dst; @@ -319,6 +336,9 @@ void object_array_remove_duplicates(struct object_array *array) if (src != array->nr) objects[array->nr] = objects[src]; array->nr++; + } else { + if (objects[src].name != object_array_slopbuf) + free(objects[src].name); } } } diff --git a/object.h b/object.h index 6c1c27fba6..2ff68c52dd 100644 --- a/object.h +++ b/object.h @@ -11,7 +11,13 @@ struct object_array { unsigned int alloc; struct object_array_entry { struct object *item; - const char *name; + /* + * name or NULL. If non-NULL, the memory pointed to + * is owned by this object *except* if it points at + * object_array_slopbuf, which is a static copy of the + * empty string. + */ + char *name; unsigned mode; } *objects; }; diff --git a/revision.c b/revision.c index be73cb46c9..4aeda334db 100644 --- a/revision.c +++ b/revision.c @@ -88,7 +88,9 @@ void add_object(struct object *obj, struct name_path *path, const char *name) { - add_object_array(obj, path_name(path, name), p); + char *pn = path_name(path, name); + add_object_array(obj, pn, p); + free(pn); } static void mark_blob_uninteresting(struct blob *blob) @@ -1288,7 +1290,7 @@ static void read_revisions_from_stdin(struct rev_info *revs, } die("options not supported in --stdin mode"); } - if (handle_revision_arg(xstrdup(sb.buf), revs, 0, + if (handle_revision_arg(sb.buf, revs, 0, REVARG_CANNOT_BE_FILENAME)) die("bad revision '%s'", sb.buf); } -- cgit v1.3-5-g9baa