From 4318d3ab6589af78cef344afbab100d639219468 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:14 -0400 Subject: dir.c: free strings in sparse cone pattern hashmaps The pattern_list structs used for cone-mode sparse lookups use a few extra hashmaps. These store pattern_entry structs, each of which has its own heap-allocated pattern string. When we clean up the hashmaps, we free the individual pattern_entry structs, but forget to clean up the embedded strings, causing memory leaks. We can fix this by iterating over the hashmaps to free the extra strings. This reduces the numbers of leaks in t7002 from 22 to 9. One alternative here would be to make the string a FLEX_ARRAY member of the pattern_entry. Then there's no extra free() required, and as a bonus it would be a little more efficient. However, some of the refactoring gets awkward, as we are often assigning strings allocated by helper functions. So let's just fix the leak for now, and we can explore bigger refactoring separately. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 2d83f3311a..9cc4a68fbc 100644 --- a/dir.c +++ b/dir.c @@ -726,6 +726,17 @@ static char *dup_and_filter_pattern(const char *pattern) return result; } +static void clear_pattern_entry_hashmap(struct hashmap *map) +{ + struct hashmap_iter iter; + struct pattern_entry *entry; + + hashmap_for_each_entry(map, &iter, entry, ent) { + free(entry->pattern); + } + hashmap_clear_and_free(map, struct pattern_entry, ent); +} + static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given) { struct pattern_entry *translated; @@ -855,8 +866,8 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern clear_hashmaps: warning(_("disabling cone pattern matching")); - hashmap_clear_and_free(&pl->parent_hashmap, struct pattern_entry, ent); - hashmap_clear_and_free(&pl->recursive_hashmap, struct pattern_entry, ent); + clear_pattern_entry_hashmap(&pl->recursive_hashmap); + clear_pattern_entry_hashmap(&pl->parent_hashmap); pl->use_cone_patterns = 0; } @@ -956,8 +967,8 @@ void clear_pattern_list(struct pattern_list *pl) free(pl->patterns[i]); free(pl->patterns); free(pl->filebuf); - hashmap_clear_and_free(&pl->recursive_hashmap, struct pattern_entry, ent); - hashmap_clear_and_free(&pl->parent_hashmap, struct pattern_entry, ent); + clear_pattern_entry_hashmap(&pl->recursive_hashmap); + clear_pattern_entry_hashmap(&pl->parent_hashmap); memset(pl, 0, sizeof(*pl)); } -- cgit v1.3-5-g45d5 From 4c844c2f499a573e5801bd3cb83b8d42d3f1566a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:20 -0400 Subject: dir.c: free removed sparse-pattern hashmap entries In add_pattern_to_hashsets(), we remove entries from the recursive_hashmap when adding similar ones to the parent_hashmap. I won't pretend to understand all of what's going on here, but there's an obvious leak: whatever we removed from recursive_hashmap is not referenced anywhere else, and is never free()d. We can easily fix this by asking the hashmap to return a pointer to the old entry. This makes t7002 now completely leak-free. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 8 +++++++- t/t7002-mv-sparse-checkout.sh | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 9cc4a68fbc..d812d521b0 100644 --- a/dir.c +++ b/dir.c @@ -810,6 +810,8 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern if (given->patternlen > 2 && !strcmp(given->pattern + given->patternlen - 2, "/*")) { + struct pattern_entry *old; + if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { /* Not a cone pattern. */ warning(_("unrecognized pattern: '%s'"), given->pattern); @@ -835,7 +837,11 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern } hashmap_add(&pl->parent_hashmap, &translated->ent); - hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data); + old = hashmap_remove_entry(&pl->recursive_hashmap, translated, ent, &data); + if (old) { + free(old->pattern); + free(old); + } free(data); return; } diff --git a/t/t7002-mv-sparse-checkout.sh b/t/t7002-mv-sparse-checkout.sh index 26582ae4e5..57969ce805 100755 --- a/t/t7002-mv-sparse-checkout.sh +++ b/t/t7002-mv-sparse-checkout.sh @@ -2,6 +2,7 @@ test_description='git mv in sparse working trees' +TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh setup_sparse_checkout () { -- cgit v1.3-5-g45d5 From eed1fbe73b46e5c7f87d724f97fba6d8a4391fc9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 4 Jun 2024 06:13:22 -0400 Subject: dir.c: always copy input to add_pattern() The add_pattern() function has a subtle and undocumented gotcha: the pattern string you pass in must remain valid as long as the pattern_list is in use (and nor do we take ownership of it). This is easy to get wrong, causing either subtle bugs (because you free or reuse the string buffer) or leaks (because you copy the string, but don't track ownership separately). All of this "pattern" code was originally the "exclude" mechanism. So this _usually_ works OK because you add entries in one of two ways: 1. From the command-line (e.g., "--exclude"), in which case we're pointing to an argv entry which remains valid for the lifetime of the program. 2. From a file (e.g., ".gitignore"), in which case we read the whole file into a buffer, attach it to the pattern_list's "filebuf" entry, then parse the buffer in-place (adding NULs). The strings point into the filebuf, which is cleaned up when the whole pattern_list goes away. But other code, like sparse-checkout, reads individual lines from stdin and passes them one by one to add_pattern(), leaking each. We could fix this by refactoring it to take in the whole buffer at once, like (2) above, and stuff it in "filebuf". But given how subtle the interface is, let's just fix it to always copy the string. That seems at first like we'd be wasting extra memory, but we can mitigate that: a. The path_pattern struct already uses a FLEXPTR, since we sometimes make a copy (when we see "foo/", we strip off the trailing slash, requiring a modifiable copy of the string). Since we'll now always embed the string inside the struct, we can switch to the regular FLEX_ARRAY pattern, saving us 8 bytes of pointer. So patterns with a trailing slash and ones under 8 bytes actually get smaller. b. Now that we don't need the original string to hang around, we can get rid of the "filebuf" mechanism entirely, and just free the file contents after parsing. Since files are the sources we'd expect to have the largest pattern sets, we should mostly break even on stuffing the same data into the individual structs. This patch just adjusts the add_pattern() interface; it doesn't fix any leaky callers yet. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 15 +++++---------- dir.h | 6 ++---- 2 files changed, 7 insertions(+), 14 deletions(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index d812d521b0..8308d167c8 100644 --- a/dir.c +++ b/dir.c @@ -925,12 +925,7 @@ void add_pattern(const char *string, const char *base, int nowildcardlen; parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); - if (flags & PATTERN_FLAG_MUSTBEDIR) { - FLEXPTR_ALLOC_MEM(pattern, pattern, string, patternlen); - } else { - pattern = xmalloc(sizeof(*pattern)); - pattern->pattern = string; - } + FLEX_ALLOC_MEM(pattern, pattern, string, patternlen); pattern->patternlen = patternlen; pattern->nowildcardlen = nowildcardlen; pattern->base = base; @@ -972,7 +967,6 @@ void clear_pattern_list(struct pattern_list *pl) for (i = 0; i < pl->nr; i++) free(pl->patterns[i]); free(pl->patterns); - free(pl->filebuf); clear_pattern_entry_hashmap(&pl->recursive_hashmap); clear_pattern_entry_hashmap(&pl->parent_hashmap); @@ -1166,6 +1160,7 @@ static int add_patterns(const char *fname, const char *base, int baselen, } add_patterns_from_buffer(buf, size, base, baselen, pl); + free(buf); return 0; } @@ -1173,16 +1168,15 @@ static int add_patterns_from_buffer(char *buf, size_t size, const char *base, int baselen, struct pattern_list *pl) { + char *orig = buf; int i, lineno = 1; char *entry; hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0); hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0); - pl->filebuf = buf; - if (skip_utf8_bom(&buf, size)) - size -= buf - pl->filebuf; + size -= buf - orig; entry = buf; @@ -1222,6 +1216,7 @@ int add_patterns_from_blob_to_list( return r; add_patterns_from_buffer(buf, size, base, baselen, pl); + free(buf); return 0; } diff --git a/dir.h b/dir.h index b9e8e96128..1398a53fb4 100644 --- a/dir.h +++ b/dir.h @@ -62,7 +62,6 @@ struct path_pattern { */ struct pattern_list *pl; - const char *pattern; int patternlen; int nowildcardlen; const char *base; @@ -74,6 +73,8 @@ struct path_pattern { * and from -1 decrementing for patterns from CLI args. */ int srcpos; + + char pattern[FLEX_ARRAY]; }; /* used for hashmaps for cone patterns */ @@ -94,9 +95,6 @@ struct pattern_list { int nr; int alloc; - /* remember pointer to exclude file contents so we can free() */ - char *filebuf; - /* origin of list, e.g. path to filename, or descriptive string */ const char *src; -- cgit v1.3-5-g45d5