From 2c389fc8ec57722aa1e8f49d316401e2e21d1b05 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:40 +0700 Subject: Move tree_entry_interesting() to tree-walk.c and export it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- tree-walk.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index a9bbf4e235..522bb6b8f6 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -455,3 +455,117 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch free(tree); return retval; } + +/* + * Is a tree entry interesting given the pathspec we have? + * + * Pre-condition: baselen == 0 || base[baselen-1] == '/' + * + * Return: + * - 2 for "yes, and all subsequent entries will be" + * - 1 for yes + * - zero for no + * - negative for "no, and no subsequent entries will be either" + */ +int tree_entry_interesting(const struct name_entry *entry, + const char *base, int baselen, + const struct pathspec *ps) +{ + int i; + int pathlen; + int never_interesting = -1; + + if (!ps || !ps->nr) + return 2; + + pathlen = tree_entry_len(entry->path, entry->sha1); + + for (i = 0; i < ps->nr; i++) { + const struct pathspec_item *item = ps->items+i; + const char *match = item->match; + int matchlen = item->len; + int m = -1; /* signals that we haven't called strncmp() */ + + if (baselen >= matchlen) { + /* If it doesn't match, move along... */ + if (strncmp(base, match, matchlen)) + continue; + + /* + * If the base is a subdirectory of a path which + * was specified, all of them are interesting. + */ + if (!matchlen || + base[matchlen] == '/' || + match[matchlen - 1] == '/') + return 2; + + /* Just a random prefix match */ + continue; + } + + /* Does the base match? */ + if (strncmp(base, match, baselen)) + continue; + + match += baselen; + matchlen -= baselen; + + if (never_interesting) { + /* + * We have not seen any match that sorts later + * than the current path. + */ + + /* + * Does match sort strictly earlier than path + * with their common parts? + */ + m = strncmp(match, entry->path, + (matchlen < pathlen) ? matchlen : pathlen); + if (m < 0) + continue; + + /* + * If we come here even once, that means there is at + * least one pathspec that would sort equal to or + * later than the path we are currently looking at. + * In other words, if we have never reached this point + * after iterating all pathspecs, it means all + * pathspecs are either outside of base, or inside the + * base but sorts strictly earlier than the current + * one. In either case, they will never match the + * subsequent entries. In such a case, we initialized + * the variable to -1 and that is what will be + * returned, allowing the caller to terminate early. + */ + never_interesting = 0; + } + + if (pathlen > matchlen) + continue; + + if (matchlen > pathlen) { + if (match[pathlen] != '/') + continue; + if (!S_ISDIR(entry->mode)) + continue; + } + + if (m == -1) + /* + * we cheated and did not do strncmp(), so we do + * that here. + */ + m = strncmp(match, entry->path, pathlen); + + /* + * If common part matched earlier then it is a hit, + * because we rejected the case where path is not a + * leading directory and is shorter than match. + */ + if (!m) + return 1; + } + return never_interesting; /* No matches */ +} -- cgit v1.3 From 48932677d62e426b3f26ac236384cb5195fb9dfd Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:42 +0700 Subject: diff-tree: convert base+baselen to writable strbuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In traversing trees, a full path is splitted into two parts: base directory and entry. They are however quite often concatenated whenever a full path is needed. Current code allocates a new buffer, do two memcpy(), use it, then release. Instead this patch turns "base" to a writable, extendable buffer. When a concatenation is needed, the callee only needs to append "entry" to base, use it, then truncate the entry out again. "base" must remain unchanged before and after entering a function. This avoids quite a bit of malloc() and memcpy(). Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- tree-diff.c | 120 ++++++++++++++++++++++++++---------------------------------- tree-walk.c | 5 ++- tree-walk.h | 2 +- 3 files changed, 56 insertions(+), 71 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-diff.c b/tree-diff.c index 23f4478cee..45a3845c0a 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -6,34 +6,18 @@ #include "diffcore.h" #include "tree.h" -static char *malloc_base(const char *base, int baselen, const char *path, int pathlen) -{ - char *newbase = xmalloc(baselen + pathlen + 2); - memcpy(newbase, base, baselen); - memcpy(newbase + baselen, path, pathlen); - memcpy(newbase + baselen + pathlen, "/", 2); - return newbase; -} +static void show_entry(struct diff_options *opt, const char *prefix, + struct tree_desc *desc, struct strbuf *base); -static char *malloc_fullname(const char *base, int baselen, const char *path, int pathlen) -{ - char *fullname = xmalloc(baselen + pathlen + 1); - memcpy(fullname, base, baselen); - memcpy(fullname + baselen, path, pathlen); - fullname[baselen + pathlen] = 0; - return fullname; -} - -static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc, - const char *base, int baselen); - -static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const char *base, int baselen, struct diff_options *opt) +static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, + struct strbuf *base, struct diff_options *opt) { unsigned mode1, mode2; const char *path1, *path2; const unsigned char *sha1, *sha2; int cmp, pathlen1, pathlen2; - char *fullname; + int old_baselen = base->len; + int retval = 0; sha1 = tree_entry_extract(t1, &path1, &mode1); sha2 = tree_entry_extract(t2, &path2, &mode2); @@ -42,11 +26,11 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const pathlen2 = tree_entry_len(path2, sha2); cmp = base_name_compare(path1, pathlen1, mode1, path2, pathlen2, mode2); if (cmp < 0) { - show_entry(opt, "-", t1, base, baselen); + show_entry(opt, "-", t1, base); return -1; } if (cmp > 0) { - show_entry(opt, "+", t2, base, baselen); + show_entry(opt, "+", t2, base); return 1; } if (!DIFF_OPT_TST(opt, FIND_COPIES_HARDER) && !hashcmp(sha1, sha2) && mode1 == mode2) @@ -57,33 +41,29 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const * file, we need to consider it a remove and an add. */ if (S_ISDIR(mode1) != S_ISDIR(mode2)) { - show_entry(opt, "-", t1, base, baselen); - show_entry(opt, "+", t2, base, baselen); + show_entry(opt, "-", t1, base); + show_entry(opt, "+", t2, base); return 0; } + strbuf_add(base, path1, pathlen1); if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode1)) { - int retval; - char *newbase = malloc_base(base, baselen, path1, pathlen1); if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { - newbase[baselen + pathlen1] = 0; opt->change(opt, mode1, mode2, - sha1, sha2, newbase, 0, 0); - newbase[baselen + pathlen1] = '/'; + sha1, sha2, base->buf, 0, 0); } - retval = diff_tree_sha1(sha1, sha2, newbase, opt); - free(newbase); - return retval; + strbuf_addch(base, '/'); + retval = diff_tree_sha1(sha1, sha2, base->buf, opt); + } else { + opt->change(opt, mode1, mode2, sha1, sha2, base->buf, 0, 0); } - - fullname = malloc_fullname(base, baselen, path1, pathlen1); - opt->change(opt, mode1, mode2, sha1, sha2, fullname, 0, 0); - free(fullname); + strbuf_setlen(base, old_baselen); return 0; } /* A whole sub-tree went away or appeared */ -static void show_tree(struct diff_options *opt, const char *prefix, struct tree_desc *desc, const char *base, int baselen) +static void show_tree(struct diff_options *opt, const char *prefix, + struct tree_desc *desc, struct strbuf *base) { int all_interesting = 0; while (desc->size) { @@ -92,30 +72,32 @@ static void show_tree(struct diff_options *opt, const char *prefix, struct tree_ if (all_interesting) show = 1; else { - show = tree_entry_interesting(&desc->entry, base, baselen, &opt->pathspec); + show = tree_entry_interesting(&desc->entry, base, + &opt->pathspec); if (show == 2) all_interesting = 1; } if (show < 0) break; if (show) - show_entry(opt, prefix, desc, base, baselen); + show_entry(opt, prefix, desc, base); update_tree_entry(desc); } } /* A file entry went away or appeared */ -static void show_entry(struct diff_options *opt, const char *prefix, struct tree_desc *desc, - const char *base, int baselen) +static void show_entry(struct diff_options *opt, const char *prefix, + struct tree_desc *desc, struct strbuf *base) { unsigned mode; const char *path; const unsigned char *sha1 = tree_entry_extract(desc, &path, &mode); int pathlen = tree_entry_len(path, sha1); + int old_baselen = base->len; + strbuf_add(base, path, pathlen); if (DIFF_OPT_TST(opt, RECURSIVE) && S_ISDIR(mode)) { enum object_type type; - char *newbase = malloc_base(base, baselen, path, pathlen); struct tree_desc inner; void *tree; unsigned long size; @@ -124,28 +106,25 @@ static void show_entry(struct diff_options *opt, const char *prefix, struct tree if (!tree || type != OBJ_TREE) die("corrupt tree sha %s", sha1_to_hex(sha1)); - if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) { - newbase[baselen + pathlen] = 0; - opt->add_remove(opt, *prefix, mode, sha1, newbase, 0); - newbase[baselen + pathlen] = '/'; - } + if (DIFF_OPT_TST(opt, TREE_IN_RECURSIVE)) + opt->add_remove(opt, *prefix, mode, sha1, base->buf, 0); - init_tree_desc(&inner, tree, size); - show_tree(opt, prefix, &inner, newbase, baselen + 1 + pathlen); + strbuf_addch(base, '/'); + init_tree_desc(&inner, tree, size); + show_tree(opt, prefix, &inner, base); free(tree); - free(newbase); - } else { - char *fullname = malloc_fullname(base, baselen, path, pathlen); - opt->add_remove(opt, prefix[0], mode, sha1, fullname, 0); - free(fullname); - } + } else + opt->add_remove(opt, prefix[0], mode, sha1, base->buf, 0); + + strbuf_setlen(base, old_baselen); } -static void skip_uninteresting(struct tree_desc *t, const char *base, int baselen, struct diff_options *opt, int *all_interesting) +static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, + struct diff_options *opt, int *all_interesting) { while (t->size) { - int show = tree_entry_interesting(&t->entry, base, baselen, &opt->pathspec); + int show = tree_entry_interesting(&t->entry, base, &opt->pathspec); if (show == 2) *all_interesting = 1; if (!show) { @@ -159,37 +138,40 @@ static void skip_uninteresting(struct tree_desc *t, const char *base, int basele } } -int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt) +int diff_tree(struct tree_desc *t1, struct tree_desc *t2, + const char *base_str, struct diff_options *opt) { - int baselen = strlen(base); + struct strbuf base; + int baselen = strlen(base_str); int all_t1_interesting = 0; int all_t2_interesting = 0; + strbuf_init(&base, PATH_MAX); + strbuf_add(&base, base_str, baselen); + for (;;) { if (DIFF_OPT_TST(opt, QUICK) && DIFF_OPT_TST(opt, HAS_CHANGES)) break; if (opt->pathspec.nr) { if (!all_t1_interesting) - skip_uninteresting(t1, base, baselen, opt, - &all_t1_interesting); + skip_uninteresting(t1, &base, opt, &all_t1_interesting); if (!all_t2_interesting) - skip_uninteresting(t2, base, baselen, opt, - &all_t2_interesting); + skip_uninteresting(t2, &base, opt, &all_t2_interesting); } if (!t1->size) { if (!t2->size) break; - show_entry(opt, "+", t2, base, baselen); + show_entry(opt, "+", t2, &base); update_tree_entry(t2); continue; } if (!t2->size) { - show_entry(opt, "-", t1, base, baselen); + show_entry(opt, "-", t1, &base); update_tree_entry(t1); continue; } - switch (compare_tree_entry(t1, t2, base, baselen, opt)) { + switch (compare_tree_entry(t1, t2, &base, opt)) { case -1: update_tree_entry(t1); continue; @@ -202,6 +184,8 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, stru } die("git diff-tree: internal error"); } + + strbuf_release(&base); return 0; } diff --git a/tree-walk.c b/tree-walk.c index 522bb6b8f6..21028d08dd 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -468,12 +468,13 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch * - negative for "no, and no subsequent entries will be either" */ int tree_entry_interesting(const struct name_entry *entry, - const char *base, int baselen, + const struct strbuf *base_buf, const struct pathspec *ps) { int i; - int pathlen; + int pathlen, baselen = base_buf->len; int never_interesting = -1; + const char *base = base_buf->buf; if (!ps || !ps->nr) return 2; diff --git a/tree-walk.h b/tree-walk.h index c12f0a2978..f81c232b5a 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -60,6 +60,6 @@ static inline int traverse_path_len(const struct traverse_info *info, const stru return info->pathlen + tree_entry_len(n->path, n->sha1); } -extern int tree_entry_interesting(const struct name_entry *, const char *, int, const struct pathspec *ps); +extern int tree_entry_interesting(const struct name_entry *, const struct strbuf *, const struct pathspec *ps); #endif -- cgit v1.3 From 58c4d66619eb21fa23a1305401e0ec4988c1c17e Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:43 +0700 Subject: tree_entry_interesting(): refactor into separate smaller functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- tree-walk.c | 170 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 93 insertions(+), 77 deletions(-) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index 21028d08dd..83bede9527 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -456,6 +456,90 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch return retval; } +static int match_entry(const struct name_entry *entry, int pathlen, + const char *match, int matchlen, + int *never_interesting) +{ + int m = -1; /* signals that we haven't called strncmp() */ + + if (*never_interesting) { + /* + * We have not seen any match that sorts later + * than the current path. + */ + + /* + * Does match sort strictly earlier than path + * with their common parts? + */ + m = strncmp(match, entry->path, + (matchlen < pathlen) ? matchlen : pathlen); + if (m < 0) + return 0; + + /* + * If we come here even once, that means there is at + * least one pathspec that would sort equal to or + * later than the path we are currently looking at. + * In other words, if we have never reached this point + * after iterating all pathspecs, it means all + * pathspecs are either outside of base, or inside the + * base but sorts strictly earlier than the current + * one. In either case, they will never match the + * subsequent entries. In such a case, we initialized + * the variable to -1 and that is what will be + * returned, allowing the caller to terminate early. + */ + *never_interesting = 0; + } + + if (pathlen > matchlen) + return 0; + + if (matchlen > pathlen) { + if (match[pathlen] != '/') + return 0; + if (!S_ISDIR(entry->mode)) + return 0; + } + + if (m == -1) + /* + * we cheated and did not do strncmp(), so we do + * that here. + */ + m = strncmp(match, entry->path, pathlen); + + /* + * If common part matched earlier then it is a hit, + * because we rejected the case where path is not a + * leading directory and is shorter than match. + */ + if (!m) + return 1; + + return 0; +} + +static int match_dir_prefix(const char *base, int baselen, + const char *match, int matchlen) +{ + if (strncmp(base, match, matchlen)) + return 0; + + /* + * If the base is a subdirectory of a path which + * was specified, all of them are interesting. + */ + if (!matchlen || + base[matchlen] == '/' || + match[matchlen - 1] == '/') + return 1; + + /* Just a random prefix match */ + return 0; +} + /* * Is a tree entry interesting given the pathspec we have? * @@ -468,13 +552,12 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch * - negative for "no, and no subsequent entries will be either" */ int tree_entry_interesting(const struct name_entry *entry, - const struct strbuf *base_buf, + const struct strbuf *base, const struct pathspec *ps) { int i; - int pathlen, baselen = base_buf->len; + int pathlen, baselen = base->len; int never_interesting = -1; - const char *base = base_buf->buf; if (!ps || !ps->nr) return 2; @@ -485,88 +568,21 @@ int tree_entry_interesting(const struct name_entry *entry, const struct pathspec_item *item = ps->items+i; const char *match = item->match; int matchlen = item->len; - int m = -1; /* signals that we haven't called strncmp() */ if (baselen >= matchlen) { /* If it doesn't match, move along... */ - if (strncmp(base, match, matchlen)) + if (!match_dir_prefix(base->buf, baselen, match, matchlen)) continue; - - /* - * If the base is a subdirectory of a path which - * was specified, all of them are interesting. - */ - if (!matchlen || - base[matchlen] == '/' || - match[matchlen - 1] == '/') - return 2; - - /* Just a random prefix match */ - continue; + return 2; } /* Does the base match? */ - if (strncmp(base, match, baselen)) - continue; - - match += baselen; - matchlen -= baselen; - - if (never_interesting) { - /* - * We have not seen any match that sorts later - * than the current path. - */ - - /* - * Does match sort strictly earlier than path - * with their common parts? - */ - m = strncmp(match, entry->path, - (matchlen < pathlen) ? matchlen : pathlen); - if (m < 0) - continue; - - /* - * If we come here even once, that means there is at - * least one pathspec that would sort equal to or - * later than the path we are currently looking at. - * In other words, if we have never reached this point - * after iterating all pathspecs, it means all - * pathspecs are either outside of base, or inside the - * base but sorts strictly earlier than the current - * one. In either case, they will never match the - * subsequent entries. In such a case, we initialized - * the variable to -1 and that is what will be - * returned, allowing the caller to terminate early. - */ - never_interesting = 0; + if (!strncmp(base->buf, match, baselen)) { + if (match_entry(entry, pathlen, + match + baselen, matchlen - baselen, + &never_interesting)) + return 1; } - - if (pathlen > matchlen) - continue; - - if (matchlen > pathlen) { - if (match[pathlen] != '/') - continue; - if (!S_ISDIR(entry->mode)) - continue; - } - - if (m == -1) - /* - * we cheated and did not do strncmp(), so we do - * that here. - */ - m = strncmp(match, entry->path, pathlen); - - /* - * If common part matched earlier then it is a hit, - * because we rejected the case where path is not a - * leading directory and is shorter than match. - */ - if (!m) - return 1; } return never_interesting; /* No matches */ } -- cgit v1.3 From bc96cc87dbb229cbdabfd93391e24ef168713a74 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:44 +0700 Subject: tree_entry_interesting(): support depth limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed to replace pathspec_matches() in builtin/grep.c. max_depth == -1 means infinite depth. Depth limit is only effective when pathspec.recursive == 1. When pathspec.recursive == 0, the behavior depends on match functions: non-recursive for tree_entry_interesting() and recursive for match_pathspec{,_depth} Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache.h | 2 ++ dir.c | 15 +++++++++++++++ dir.h | 1 + tree-diff.c | 4 ++++ tree-walk.c | 19 ++++++++++++++++--- 5 files changed, 38 insertions(+), 3 deletions(-) (limited to 'tree-walk.c') diff --git a/cache.h b/cache.h index a6456143b2..0cf0bac893 100644 --- a/cache.h +++ b/cache.h @@ -503,6 +503,8 @@ extern int ie_modified(const struct index_state *, struct cache_entry *, struct struct pathspec { const char **raw; /* get_pathspec() result, not freed by free_pathspec() */ int nr; + int recursive:1; + int max_depth; struct pathspec_item { const char *match; int len; diff --git a/dir.c b/dir.c index 70d10bc3da..c3bddb60c8 100644 --- a/dir.c +++ b/dir.c @@ -87,6 +87,21 @@ int fill_directory(struct dir_struct *dir, const char **pathspec) return len; } +int within_depth(const char *name, int namelen, + int depth, int max_depth) +{ + const char *cp = name, *cpe = name + namelen; + + while (cp < cpe) { + if (*cp++ != '/') + continue; + depth++; + if (depth > max_depth) + return 0; + } + return 1; +} + /* * Does 'match' match the given name? * A match is found if diff --git a/dir.h b/dir.h index 72a764ed84..5fa3fbe4c5 100644 --- a/dir.h +++ b/dir.h @@ -65,6 +65,7 @@ struct dir_struct { #define MATCHED_FNMATCH 2 #define MATCHED_EXACTLY 3 extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen); +extern int within_depth(const char *name, int namelen, int depth, int max_depth); extern int fill_directory(struct dir_struct *dir, const char **pathspec); extern int read_directory(struct dir_struct *, const char *path, int len, const char **pathspec); diff --git a/tree-diff.c b/tree-diff.c index 45a3845c0a..03dc5c8e70 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -146,6 +146,10 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, int all_t1_interesting = 0; int all_t2_interesting = 0; + /* Enable recursion indefinitely */ + opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); + opt->pathspec.max_depth = -1; + strbuf_init(&base, PATH_MAX); strbuf_add(&base, base_str, baselen); diff --git a/tree-walk.c b/tree-walk.c index 83bede9527..33feafa964 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -1,6 +1,7 @@ #include "cache.h" #include "tree-walk.h" #include "unpack-trees.h" +#include "dir.h" #include "tree.h" static const char *get_mode(const char *str, unsigned int *modep) @@ -559,8 +560,13 @@ int tree_entry_interesting(const struct name_entry *entry, int pathlen, baselen = base->len; int never_interesting = -1; - if (!ps || !ps->nr) - return 2; + if (!ps->nr) { + if (!ps->recursive || ps->max_depth == -1) + return 2; + return !!within_depth(base->buf, baselen, + !!S_ISDIR(entry->mode), + ps->max_depth); + } pathlen = tree_entry_len(entry->path, entry->sha1); @@ -573,7 +579,14 @@ int tree_entry_interesting(const struct name_entry *entry, /* If it doesn't match, move along... */ if (!match_dir_prefix(base->buf, baselen, match, matchlen)) continue; - return 2; + + if (!ps->recursive || ps->max_depth == -1) + return 2; + + return !!within_depth(base->buf + matchlen + 1, + baselen - matchlen - 1, + !!S_ISDIR(entry->mode), + ps->max_depth); } /* Does the base match? */ -- cgit v1.3 From 86e4ca69e358836599d4eb0c0e217caa9c9e455b Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:45 +0700 Subject: tree_entry_interesting(): fix depth limit with overlapping pathspecs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suppose we have two pathspecs 'a' and 'a/b' (both are dirs) and depth limit 1. In current code, pathspecs are checked in input order. When 'a/b' is checked against pathspec 'a', it fails depth limit and therefore is excluded, although it should match 'a/b' pathspec. This patch reorders all pathspecs alphabetically, then teaches tree_entry_interesting() to check against the deepest pathspec first, so depth limit of a shallower pathspec won't affect a deeper one. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 13 +++++++++++++ tree-walk.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'tree-walk.c') diff --git a/dir.c b/dir.c index c3bddb60c8..5b4e2b1cb3 100644 --- a/dir.c +++ b/dir.c @@ -1166,6 +1166,15 @@ int remove_path(const char *name) return 0; } +static int pathspec_item_cmp(const void *a_, const void *b_) +{ + struct pathspec_item *a, *b; + + a = (struct pathspec_item *)a_; + b = (struct pathspec_item *)b_; + return strcmp(a->match, b->match); +} + int init_pathspec(struct pathspec *pathspec, const char **paths) { const char **p = paths; @@ -1189,6 +1198,10 @@ int init_pathspec(struct pathspec *pathspec, const char **paths) item->match = path; item->len = strlen(path); } + + qsort(pathspec->items, pathspec->nr, + sizeof(struct pathspec_item), pathspec_item_cmp); + return 0; } diff --git a/tree-walk.c b/tree-walk.c index 33feafa964..be8182c72f 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -570,7 +570,7 @@ int tree_entry_interesting(const struct name_entry *entry, pathlen = tree_entry_len(entry->path, entry->sha1); - for (i = 0; i < ps->nr; i++) { + for (i = ps->nr-1; i >= 0; i--) { const struct pathspec_item *item = ps->items+i; const char *match = item->match; int matchlen = item->len; -- cgit v1.3 From d38f28093ef795bef13d2fda6621b4952afb42db Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:46 +0700 Subject: tree_entry_interesting(): support wildcard matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit never_interesting optimization is disabled if there is any wildcard pathspec, even if it only matches exactly on trees. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- cache.h | 2 ++ dir.c | 3 +++ t/t4010-diff-pathspec.sh | 14 ++++++++++++++ tree-walk.c | 30 +++++++++++++++++++++++++++--- tree-walk.h | 2 +- 5 files changed, 47 insertions(+), 4 deletions(-) (limited to 'tree-walk.c') diff --git a/cache.h b/cache.h index 0cf0bac893..800efa2328 100644 --- a/cache.h +++ b/cache.h @@ -503,11 +503,13 @@ extern int ie_modified(const struct index_state *, struct cache_entry *, struct struct pathspec { const char **raw; /* get_pathspec() result, not freed by free_pathspec() */ int nr; + int has_wildcard:1; int recursive:1; int max_depth; struct pathspec_item { const char *match; int len; + int has_wildcard:1; } *items; }; diff --git a/dir.c b/dir.c index 5b4e2b1cb3..b6ccaf3703 100644 --- a/dir.c +++ b/dir.c @@ -1197,6 +1197,9 @@ int init_pathspec(struct pathspec *pathspec, const char **paths) item->match = path; item->len = strlen(path); + item->has_wildcard = !no_wildcard(path); + if (item->has_wildcard) + pathspec->has_wildcard = 1; } qsort(pathspec->items, pathspec->nr, diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh index 94df7ae53a..4b120f8e23 100755 --- a/t/t4010-diff-pathspec.sh +++ b/t/t4010-diff-pathspec.sh @@ -70,4 +70,18 @@ test_expect_success 'diff-tree pathspec' ' test_cmp expected current ' +EMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904 + +test_expect_success 'diff-tree with wildcard shows dir also matches' ' + git diff-tree --name-only $EMPTY_TREE $tree -- "f*" >result && + echo file0 >expected && + test_cmp expected result +' + +test_expect_success 'diff-tree -r with wildcard' ' + git diff-tree -r --name-only $EMPTY_TREE $tree -- "*file1" >result && + echo path1/file1 >expected && + test_cmp expected result +' + test_done diff --git a/tree-walk.c b/tree-walk.c index be8182c72f..ae7ac1a9f2 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -553,12 +553,12 @@ static int match_dir_prefix(const char *base, int baselen, * - negative for "no, and no subsequent entries will be either" */ int tree_entry_interesting(const struct name_entry *entry, - const struct strbuf *base, + struct strbuf *base, const struct pathspec *ps) { int i; int pathlen, baselen = base->len; - int never_interesting = -1; + int never_interesting = ps->has_wildcard ? 0 : -1; if (!ps->nr) { if (!ps->recursive || ps->max_depth == -1) @@ -578,7 +578,7 @@ int tree_entry_interesting(const struct name_entry *entry, if (baselen >= matchlen) { /* If it doesn't match, move along... */ if (!match_dir_prefix(base->buf, baselen, match, matchlen)) - continue; + goto match_wildcards; if (!ps->recursive || ps->max_depth == -1) return 2; @@ -596,6 +596,30 @@ int tree_entry_interesting(const struct name_entry *entry, &never_interesting)) return 1; } + +match_wildcards: + if (!ps->items[i].has_wildcard) + continue; + + /* + * Concatenate base and entry->path into one and do + * fnmatch() on it. + */ + + strbuf_add(base, entry->path, pathlen); + + if (!fnmatch(match, base->buf, 0)) { + strbuf_setlen(base, baselen); + return 1; + } + strbuf_setlen(base, baselen); + + /* + * Match all directories. We'll try to match files + * later on. + */ + if (ps->recursive && S_ISDIR(entry->mode)) + return 1; } return never_interesting; /* No matches */ } diff --git a/tree-walk.h b/tree-walk.h index f81c232b5a..6589ee27e4 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -60,6 +60,6 @@ static inline int traverse_path_len(const struct traverse_info *info, const stru return info->pathlen + tree_entry_len(n->path, n->sha1); } -extern int tree_entry_interesting(const struct name_entry *, const struct strbuf *, const struct pathspec *ps); +extern int tree_entry_interesting(const struct name_entry *, struct strbuf *, const struct pathspec *ps); #endif -- cgit v1.3 From f1a2ddbbc2df4e81e005a7e82f9e23fb6a9f95e3 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Wed, 15 Dec 2010 22:02:47 +0700 Subject: tree_entry_interesting(): optimize wildcard matching when base is matched MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If base is already matched, skip that part when calling fnmatch(). This happens quite often if users start a command from worktree's subdirectory and prefix is usually prepended to all pathspecs. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- t/t4010-diff-pathspec.sh | 18 ++++++++++++++++++ tree-walk.c | 14 ++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'tree-walk.c') diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh index 4b120f8e23..fbc8cd8f05 100755 --- a/t/t4010-diff-pathspec.sh +++ b/t/t4010-diff-pathspec.sh @@ -84,4 +84,22 @@ test_expect_success 'diff-tree -r with wildcard' ' test_cmp expected result ' +test_expect_success 'diff-tree with wildcard shows dir also matches' ' + git diff-tree --name-only $tree $tree2 -- "path1/f*" >result && + echo path1 >expected && + test_cmp expected result +' + +test_expect_success 'diff-tree -r with wildcard from beginning' ' + git diff-tree -r --name-only $tree $tree2 -- "path1/*file1" >result && + echo path1/file1 >expected && + test_cmp expected result +' + +test_expect_success 'diff-tree -r with wildcard' ' + git diff-tree -r --name-only $tree $tree2 -- "path1/f*" >result && + echo path1/file1 >expected && + test_cmp expected result +' + test_done diff --git a/tree-walk.c b/tree-walk.c index ae7ac1a9f2..7596716cf0 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -595,6 +595,20 @@ int tree_entry_interesting(const struct name_entry *entry, match + baselen, matchlen - baselen, &never_interesting)) return 1; + + if (ps->items[i].has_wildcard) { + if (!fnmatch(match + baselen, entry->path, 0)) + return 1; + + /* + * Match all directories. We'll try to + * match files later on. + */ + if (ps->recursive && S_ISDIR(entry->mode)) + return 1; + } + + continue; } match_wildcards: -- cgit v1.3 From 1376e50723228fc21b7183fe86d71ee484a70dd7 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Fri, 17 Dec 2010 19:45:33 +0700 Subject: grep: drop pathspec_matches() in favor of tree_entry_interesting() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/grep.c | 125 +++++++-------------------------------------------------- tree-diff.c | 4 +- tree-walk.c | 24 ++++++----- tree-walk.h | 2 +- 4 files changed, 30 insertions(+), 125 deletions(-) (limited to 'tree-walk.c') diff --git a/builtin/grep.c b/builtin/grep.c index c9622d6aa9..c3af8760cc 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -329,106 +329,6 @@ static int grep_config(const char *var, const char *value, void *cb) return 0; } -/* - * Return non-zero if max_depth is negative or path has no more then max_depth - * slashes. - */ -static int accept_subdir(const char *path, int max_depth) -{ - if (max_depth < 0) - return 1; - - while ((path = strchr(path, '/')) != NULL) { - max_depth--; - if (max_depth < 0) - return 0; - path++; - } - return 1; -} - -/* - * Return non-zero if name is a subdirectory of match and is not too deep. - */ -static int is_subdir(const char *name, int namelen, - const char *match, int matchlen, int max_depth) -{ - if (matchlen > namelen || strncmp(name, match, matchlen)) - return 0; - - if (name[matchlen] == '\0') /* exact match */ - return 1; - - if (!matchlen || match[matchlen-1] == '/' || name[matchlen] == '/') - return accept_subdir(name + matchlen + 1, max_depth); - - return 0; -} - -/* - * git grep pathspecs are somewhat different from diff-tree pathspecs; - * pathname wildcards are allowed. - */ -static int pathspec_matches(const char **paths, const char *name, int max_depth) -{ - int namelen, i; - if (!paths || !*paths) - return accept_subdir(name, max_depth); - namelen = strlen(name); - for (i = 0; paths[i]; i++) { - const char *match = paths[i]; - int matchlen = strlen(match); - const char *cp, *meta; - - if (is_subdir(name, namelen, match, matchlen, max_depth)) - return 1; - if (!fnmatch(match, name, 0)) - return 1; - if (name[namelen-1] != '/') - continue; - - /* We are being asked if the directory ("name") is worth - * descending into. - * - * Find the longest leading directory name that does - * not have metacharacter in the pathspec; the name - * we are looking at must overlap with that directory. - */ - for (cp = match, meta = NULL; cp - match < matchlen; cp++) { - char ch = *cp; - if (ch == '*' || ch == '[' || ch == '?') { - meta = cp; - break; - } - } - if (!meta) - meta = cp; /* fully literal */ - - if (namelen <= meta - match) { - /* Looking at "Documentation/" and - * the pattern says "Documentation/howto/", or - * "Documentation/diff*.txt". The name we - * have should match prefix. - */ - if (!memcmp(match, name, namelen)) - return 1; - continue; - } - - if (meta - match < namelen) { - /* Looking at "Documentation/howto/" and - * the pattern says "Documentation/h*"; - * match up to "Do.../h"; this avoids descending - * into "Documentation/technical/". - */ - if (!memcmp(match, name, meta - match)) - return 1; - continue; - } - } - return 0; -} - static void *lock_and_read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size) { void *data; @@ -621,25 +521,24 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, struct tree_desc *tree, struct strbuf *base, int tn_len) { - int hit = 0; + int hit = 0, matched = 0; struct name_entry entry; int old_baselen = base->len; while (tree_entry(tree, &entry)) { int te_len = tree_entry_len(entry.path, entry.sha1); - strbuf_add(base, entry.path, te_len); + if (matched != 2) { + matched = tree_entry_interesting(&entry, base, tn_len, pathspec); + if (matched == -1) + break; /* no more matches */ + if (!matched) + continue; + } - if (S_ISDIR(entry.mode)) - /* Match "abc/" against pathspec to - * decide if we want to descend into "abc" - * directory. - */ - strbuf_addch(base, '/'); + strbuf_add(base, entry.path, te_len); - if (!pathspec_matches(pathspec->raw, base->buf + tn_len, opt->max_depth)) - ; - else if (S_ISREG(entry.mode)) { + if (S_ISREG(entry.mode)) { hit |= grep_sha1(opt, entry.sha1, base->buf, tn_len); } else if (S_ISDIR(entry.mode)) { @@ -652,6 +551,8 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, if (!data) die("unable to read tree (%s)", sha1_to_hex(entry.sha1)); + + strbuf_addch(base, '/'); init_tree_desc(&sub, data, size); hit |= grep_tree(opt, pathspec, &sub, base, tn_len); free(data); @@ -1058,6 +959,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) paths[1] = NULL; } init_pathspec(&pathspec, paths); + pathspec.max_depth = opt.max_depth; + pathspec.recursive = 1; if (show_in_pager && (cached || list.nr)) die("--open-files-in-pager only works on the worktree"); diff --git a/tree-diff.c b/tree-diff.c index 03dc5c8e70..3954281f50 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -72,7 +72,7 @@ static void show_tree(struct diff_options *opt, const char *prefix, if (all_interesting) show = 1; else { - show = tree_entry_interesting(&desc->entry, base, + show = tree_entry_interesting(&desc->entry, base, 0, &opt->pathspec); if (show == 2) all_interesting = 1; @@ -124,7 +124,7 @@ static void skip_uninteresting(struct tree_desc *t, struct strbuf *base, struct diff_options *opt, int *all_interesting) { while (t->size) { - int show = tree_entry_interesting(&t->entry, base, &opt->pathspec); + int show = tree_entry_interesting(&t->entry, base, 0, &opt->pathspec); if (show == 2) *all_interesting = 1; if (!show) { diff --git a/tree-walk.c b/tree-walk.c index 7596716cf0..322becc3b4 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -544,7 +544,8 @@ static int match_dir_prefix(const char *base, int baselen, /* * Is a tree entry interesting given the pathspec we have? * - * Pre-condition: baselen == 0 || base[baselen-1] == '/' + * Pre-condition: either baselen == base_offset (i.e. empty path) + * or base[baselen-1] == '/' (i.e. with trailing slash). * * Return: * - 2 for "yes, and all subsequent entries will be" @@ -553,44 +554,45 @@ static int match_dir_prefix(const char *base, int baselen, * - negative for "no, and no subsequent entries will be either" */ int tree_entry_interesting(const struct name_entry *entry, - struct strbuf *base, + struct strbuf *base, int base_offset, const struct pathspec *ps) { int i; - int pathlen, baselen = base->len; + int pathlen, baselen = base->len - base_offset; int never_interesting = ps->has_wildcard ? 0 : -1; if (!ps->nr) { if (!ps->recursive || ps->max_depth == -1) return 2; - return !!within_depth(base->buf, baselen, + return !!within_depth(base->buf + base_offset, baselen, !!S_ISDIR(entry->mode), ps->max_depth); } pathlen = tree_entry_len(entry->path, entry->sha1); - for (i = ps->nr-1; i >= 0; i--) { + for (i = ps->nr - 1; i >= 0; i--) { const struct pathspec_item *item = ps->items+i; const char *match = item->match; + const char *base_str = base->buf + base_offset; int matchlen = item->len; if (baselen >= matchlen) { /* If it doesn't match, move along... */ - if (!match_dir_prefix(base->buf, baselen, match, matchlen)) + if (!match_dir_prefix(base_str, baselen, match, matchlen)) goto match_wildcards; if (!ps->recursive || ps->max_depth == -1) return 2; - return !!within_depth(base->buf + matchlen + 1, + return !!within_depth(base_str + matchlen + 1, baselen - matchlen - 1, !!S_ISDIR(entry->mode), ps->max_depth); } /* Does the base match? */ - if (!strncmp(base->buf, match, baselen)) { + if (!strncmp(base_str, match, baselen)) { if (match_entry(entry, pathlen, match + baselen, matchlen - baselen, &never_interesting)) @@ -622,11 +624,11 @@ match_wildcards: strbuf_add(base, entry->path, pathlen); - if (!fnmatch(match, base->buf, 0)) { - strbuf_setlen(base, baselen); + if (!fnmatch(match, base->buf + base_offset, 0)) { + strbuf_setlen(base, base_offset + baselen); return 1; } - strbuf_setlen(base, baselen); + strbuf_setlen(base, base_offset + baselen); /* * Match all directories. We'll try to match files diff --git a/tree-walk.h b/tree-walk.h index 6589ee27e4..39524b7dba 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -60,6 +60,6 @@ static inline int traverse_path_len(const struct traverse_info *info, const stru return info->pathlen + tree_entry_len(n->path, n->sha1); } -extern int tree_entry_interesting(const struct name_entry *, struct strbuf *, const struct pathspec *ps); +extern int tree_entry_interesting(const struct name_entry *, struct strbuf *, int, const struct pathspec *ps); #endif -- cgit v1.3