From 64acde94efd2906c3e20560c31c2957ac0b242c4 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sun, 14 Jul 2013 15:35:25 +0700 Subject: move struct pathspec and related functions to pathspec.[ch] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- tree-walk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tree-walk.c') diff --git a/tree-walk.c b/tree-walk.c index 6e30ef9d04..72a96139f5 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -3,6 +3,7 @@ #include "unpack-trees.h" #include "dir.h" #include "tree.h" +#include "pathspec.h" static const char *get_mode(const char *str, unsigned int *modep) { -- cgit v1.3-5-g45d5 From 6330a171996abbec7dac48788de851aea7d0a18f Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sun, 14 Jul 2013 15:35:32 +0700 Subject: parse_pathspec: add special flag for max_depth feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit match_pathspec_depth() and tree_entry_interesting() check max_depth field in order to support "git grep --max-depth". The feature activation is tied to "recursive" field, which led to some unwanted activation, e.g. 5c8eeb8 (diff-index: enable recursive pathspec matching in unpack_trees - 2012-01-15). This patch decouples the activation from "recursive" field, puts it in "magic" field instead. This makes sure that only "git grep" can activate this feature. And because parse_pathspec knows when the feature is not used, it does not need to sort pathspec (required for max_depth to work correctly). A small win for non-grep cases. Even though a new magic flag is introduced, no magic syntax is. The magic can be only enabled by parse_pathspec() caller. We might someday want to support ":(maxdepth:10)src." It all depends on actual use cases. max_depth feature cannot be enabled via init_pathspec() anymore. But that's ok because init_pathspec() is on its way to /dev/null. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- builtin/grep.c | 3 ++- diff-lib.c | 1 - dir.c | 8 ++++++-- pathspec.c | 8 ++++++-- pathspec.h | 6 +++++- tree-diff.c | 1 - tree-walk.c | 8 ++++++-- 7 files changed, 25 insertions(+), 10 deletions(-) (limited to 'tree-walk.c') diff --git a/builtin/grep.c b/builtin/grep.c index 1a6c02804c..4bc075422e 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -858,7 +858,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) } parse_pathspec(&pathspec, 0, - PATHSPEC_PREFER_CWD, + PATHSPEC_PREFER_CWD | + (opt.max_depth != -1 ? PATHSPEC_MAXDEPTH_VALID : 0), prefix, argv + i); pathspec.max_depth = opt.max_depth; pathspec.recursive = 1; diff --git a/diff-lib.c b/diff-lib.c index b6f4b21637..d4e17f7662 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -476,7 +476,6 @@ static int diff_cache(struct rev_info *revs, opts.dst_index = NULL; opts.pathspec = &revs->diffopt.pathspec; opts.pathspec->recursive = 1; - opts.pathspec->max_depth = -1; init_tree_desc(&t, tree->buffer, tree->size); return unpack_trees(1, &t, &opts); diff --git a/dir.c b/dir.c index 308028e5b1..e28bc0d636 100644 --- a/dir.c +++ b/dir.c @@ -341,7 +341,9 @@ int match_pathspec_depth(const struct pathspec *ps, int i, retval = 0; if (!ps->nr) { - if (!ps->recursive || ps->max_depth == -1) + if (!ps->recursive || + !(ps->magic & PATHSPEC_MAXDEPTH) || + ps->max_depth == -1) return MATCHED_RECURSIVELY; if (within_depth(name, namelen, 0, ps->max_depth)) @@ -358,7 +360,9 @@ int match_pathspec_depth(const struct pathspec *ps, if (seen && seen[i] == MATCHED_EXACTLY) continue; how = match_pathspec_item(ps->items+i, prefix, name, namelen); - if (ps->recursive && ps->max_depth != -1 && + if (ps->recursive && + (ps->magic & PATHSPEC_MAXDEPTH) && + ps->max_depth != -1 && how && how != MATCHED_FNMATCH) { int len = ps->items[i].len; if (name[len] == '/') diff --git a/pathspec.c b/pathspec.c index 6d99a3dced..06778fc756 100644 --- a/pathspec.c +++ b/pathspec.c @@ -267,6 +267,9 @@ void parse_pathspec(struct pathspec *pathspec, memset(pathspec, 0, sizeof(*pathspec)); + if (flags & PATHSPEC_MAXDEPTH_VALID) + pathspec->magic |= PATHSPEC_MAXDEPTH; + /* No arguments, no prefix -> no pathspec */ if (!entry && !prefix) return; @@ -322,8 +325,9 @@ void parse_pathspec(struct pathspec *pathspec, pathspec->magic |= item[i].magic; } - qsort(pathspec->items, pathspec->nr, - sizeof(struct pathspec_item), pathspec_item_cmp); + if (pathspec->magic & PATHSPEC_MAXDEPTH) + qsort(pathspec->items, pathspec->nr, + sizeof(struct pathspec_item), pathspec_item_cmp); } /* diff --git a/pathspec.h b/pathspec.h index d630e8b1f9..aa98597bda 100644 --- a/pathspec.h +++ b/pathspec.h @@ -3,7 +3,10 @@ /* Pathspec magic */ #define PATHSPEC_FROMTOP (1<<0) -#define PATHSPEC_ALL_MAGIC PATHSPEC_FROMTOP +#define PATHSPEC_MAXDEPTH (1<<1) +#define PATHSPEC_ALL_MAGIC \ + (PATHSPEC_FROMTOP | \ + PATHSPEC_MAXDEPTH) #define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */ @@ -27,6 +30,7 @@ struct pathspec { /* parse_pathspec flags */ #define PATHSPEC_PREFER_CWD (1<<0) /* No args means match cwd */ #define PATHSPEC_PREFER_FULL (1<<1) /* No args means match everything */ +#define PATHSPEC_MAXDEPTH_VALID (1<<2) /* max_depth field is valid */ extern int init_pathspec(struct pathspec *, const char **); extern void parse_pathspec(struct pathspec *pathspec, diff --git a/tree-diff.c b/tree-diff.c index ba01563a02..826512e84d 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -138,7 +138,6 @@ int diff_tree(struct tree_desc *t1, struct tree_desc *t2, /* Enable recursion indefinitely */ opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE); - opt->pathspec.max_depth = -1; strbuf_init(&base, PATH_MAX); strbuf_add(&base, base_str, baselen); diff --git a/tree-walk.c b/tree-walk.c index 72a96139f5..d399ca9dcc 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -637,7 +637,9 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, entry_not_interesting : all_entries_not_interesting; if (!ps->nr) { - if (!ps->recursive || ps->max_depth == -1) + if (!ps->recursive || + !(ps->magic & PATHSPEC_MAXDEPTH) || + ps->max_depth == -1) return all_entries_interesting; return within_depth(base->buf + base_offset, baselen, !!S_ISDIR(entry->mode), @@ -658,7 +660,9 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, if (!match_dir_prefix(base_str, match, matchlen)) goto match_wildcards; - if (!ps->recursive || ps->max_depth == -1) + if (!ps->recursive || + !(ps->magic & PATHSPEC_MAXDEPTH) || + ps->max_depth == -1) return all_entries_interesting; return within_depth(base_str + matchlen + 1, -- cgit v1.3-5-g45d5 From 8f4f8f4579fe8cc630e118cc736de2b8d5cf8e34 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sun, 14 Jul 2013 15:35:36 +0700 Subject: guard against new pathspec magic in pathspec matching code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GUARD_PATHSPEC() marks pathspec-sensitive code, basically all those that touch anything in 'struct pathspec' except fields "nr" and "original". GUARD_PATHSPEC() is not supposed to fail. It's mainly to help the designers catch unsupported codepaths. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/technical/api-setup.txt | 19 +++++++++++++++++++ builtin/diff.c | 2 ++ dir.c | 2 ++ pathspec.h | 7 +++++++ tree-diff.c | 19 +++++++++++++++++++ tree-walk.c | 2 ++ 6 files changed, 51 insertions(+) (limited to 'tree-walk.c') diff --git a/Documentation/technical/api-setup.txt b/Documentation/technical/api-setup.txt index 90d1aff625..540e455689 100644 --- a/Documentation/technical/api-setup.txt +++ b/Documentation/technical/api-setup.txt @@ -28,3 +28,22 @@ parse_pathspec(). This function takes several arguments: - prefix and args come from cmd_* functions get_pathspec() is obsolete and should never be used in new code. + +parse_pathspec() helps catch unsupported features and reject them +politely. At a lower level, different pathspec-related functions may +not support the same set of features. Such pathspec-sensitive +functions are guarded with GUARD_PATHSPEC(), which will die in an +unfriendly way when an unsupported feature is requested. + +The command designers are supposed to make sure that GUARD_PATHSPEC() +never dies. They have to make sure all unsupported features are caught +by parse_pathspec(), not by GUARD_PATHSPEC. grepping GUARD_PATHSPEC() +should give the designers all pathspec-sensitive codepaths and what +features they support. + +A similar process is applied when a new pathspec magic is added. The +designer lifts the GUARD_PATHSPEC restriction in the functions that +support the new magic. At the same time (s)he has to make sure this +new feature will be caught at parse_pathspec() in commands that cannot +handle the new magic in some cases. grepping parse_pathspec() should +help. diff --git a/builtin/diff.c b/builtin/diff.c index 9fc273d8cd..6bb41aff5d 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -367,6 +367,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix) } } if (rev.prune_data.nr) { + /* builtin_diff_b_f() */ + GUARD_PATHSPEC(&rev.prune_data, PATHSPEC_FROMTOP); if (!path) path = rev.prune_data.items[0].match; paths += rev.prune_data.nr; diff --git a/dir.c b/dir.c index e28bc0d636..19978d3d59 100644 --- a/dir.c +++ b/dir.c @@ -340,6 +340,8 @@ int match_pathspec_depth(const struct pathspec *ps, { int i, retval = 0; + GUARD_PATHSPEC(ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH); + if (!ps->nr) { if (!ps->recursive || !(ps->magic & PATHSPEC_MAXDEPTH) || diff --git a/pathspec.h b/pathspec.h index 2e427d7f4c..6baf205862 100644 --- a/pathspec.h +++ b/pathspec.h @@ -27,6 +27,13 @@ struct pathspec { } *items; }; +#define GUARD_PATHSPEC(ps, mask) \ + do { \ + if ((ps)->magic & ~(mask)) \ + die("BUG:%s:%d: unsupported magic %x", \ + __FILE__, __LINE__, (ps)->magic & ~(mask)); \ + } while (0) + /* parse_pathspec flags */ #define PATHSPEC_PREFER_CWD (1<<0) /* No args means match cwd */ #define PATHSPEC_PREFER_FULL (1<<1) /* No args means match everything */ diff --git a/tree-diff.c b/tree-diff.c index 826512e84d..5a876148bb 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -198,6 +198,25 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co const char *paths[1]; int i; + /* + * follow-rename code is very specific, we need exactly one + * path. Magic that matches more than one path is not + * supported. + */ + GUARD_PATHSPEC(&opt->pathspec, PATHSPEC_FROMTOP); +#if 0 + /* + * We should reject wildcards as well. Unfortunately we + * haven't got a reliable way to detect that 'foo\*bar' in + * fact has no wildcards. nowildcard_len is merely a hint for + * optimization. Let it slip for now until wildmatch is taught + * about dry-run mode and returns wildcard info. + */ + if (opt->pathspec.has_wildcard) + die("BUG:%s:%d: wildcards are not supported", + __FILE__, __LINE__); +#endif + /* Remove the file creation entry from the diff queue, and remember it */ choice = q->queue[0]; q->nr = 0; diff --git a/tree-walk.c b/tree-walk.c index d399ca9dcc..37b157ed6e 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -636,6 +636,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, enum interesting never_interesting = ps->has_wildcard ? entry_not_interesting : all_entries_not_interesting; + GUARD_PATHSPEC(ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH); + if (!ps->nr) { if (!ps->recursive || !(ps->magic & PATHSPEC_MAXDEPTH) || -- cgit v1.3-5-g45d5 From 5c6933d201fab183a9779dca0fe43bf2f1eca098 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sun, 14 Jul 2013 15:36:06 +0700 Subject: pathspec: support :(literal) syntax for noglob pathspec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/glossary-content.txt | 20 ++++++++++++++++---- builtin/add.c | 2 +- builtin/diff.c | 2 +- dir.c | 15 ++++++++++++--- pathspec.c | 11 ++++++++--- pathspec.h | 4 +++- t/t6130-pathspec-noglob.sh | 18 ++++++++++++++++++ tree-diff.c | 2 +- tree-walk.c | 5 ++++- 9 files changed, 64 insertions(+), 15 deletions(-) (limited to 'tree-walk.c') diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index dba5062b37..ca9f20f25f 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -322,10 +322,22 @@ and a close parentheses `)`, and the remainder is the pattern to match against the path. + The "magic signature" consists of an ASCII symbol that is not -alphanumeric. Currently only the slash `/` is recognized as a -"magic signature": it makes the pattern match from the root of -the working tree, even when you are running the command from -inside a subdirectory. +alphanumeric. ++ +-- +top `/`;; + The magic word `top` (mnemonic: `/`) makes the pattern match + from the root of the working tree, even when you are running + the command from inside a subdirectory. + +literal;; + Wildcards in the pattern such as `*` or `?` are treated + as literal characters. +-- ++ +Currently only the slash `/` is recognized as the "magic signature", +but it is envisioned that we will support more types of magic in later +versions of Git. + A pathspec with only a colon means "there is no pathspec". This form should not be combined with other pathspec. diff --git a/builtin/add.c b/builtin/add.c index 0b80fa8956..663ddd122c 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -541,7 +541,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) /* * file_exists() assumes exact match */ - GUARD_PATHSPEC(&pathspec, PATHSPEC_FROMTOP); + GUARD_PATHSPEC(&pathspec, PATHSPEC_FROMTOP | PATHSPEC_LITERAL); for (i = 0; i < pathspec.nr; i++) { const char *path = pathspec.items[i].match; diff --git a/builtin/diff.c b/builtin/diff.c index bb84ba0e46..2fb8c5dc0b 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -368,7 +368,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) } if (rev.prune_data.nr) { /* builtin_diff_b_f() */ - GUARD_PATHSPEC(&rev.prune_data, PATHSPEC_FROMTOP); + GUARD_PATHSPEC(&rev.prune_data, PATHSPEC_FROMTOP | PATHSPEC_LITERAL); if (!path) path = rev.prune_data.items[0].match; paths += rev.prune_data.nr; diff --git a/dir.c b/dir.c index 79465e7f4c..50ec2f5478 100644 --- a/dir.c +++ b/dir.c @@ -108,7 +108,10 @@ static size_t common_prefix_len(const struct pathspec *pathspec) int n; size_t max = 0; - GUARD_PATHSPEC(pathspec, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH); + GUARD_PATHSPEC(pathspec, + PATHSPEC_FROMTOP | + PATHSPEC_MAXDEPTH | + PATHSPEC_LITERAL); for (n = 0; n < pathspec->nr; n++) { size_t i = 0, len = 0; @@ -232,7 +235,10 @@ int match_pathspec_depth(const struct pathspec *ps, { int i, retval = 0; - GUARD_PATHSPEC(ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH); + GUARD_PATHSPEC(ps, + PATHSPEC_FROMTOP | + PATHSPEC_MAXDEPTH | + PATHSPEC_LITERAL); if (!ps->nr) { if (!ps->recursive || @@ -1288,7 +1294,10 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru * Check out create_simplify() */ if (pathspec) - GUARD_PATHSPEC(pathspec, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH); + GUARD_PATHSPEC(pathspec, + PATHSPEC_FROMTOP | + PATHSPEC_MAXDEPTH | + PATHSPEC_LITERAL); if (has_symlink_leading_path(path, len)) return dir->nr; diff --git a/pathspec.c b/pathspec.c index b2e3a8778c..6a16938cb6 100644 --- a/pathspec.c +++ b/pathspec.c @@ -70,6 +70,7 @@ static struct pathspec_magic { const char *name; } pathspec_magic[] = { { PATHSPEC_FROMTOP, '/', "top" }, + { PATHSPEC_LITERAL, 0, "literal" }, }; /* @@ -92,13 +93,15 @@ static unsigned prefix_pathspec(struct pathspec_item *item, const char *elt) { static int literal_global = -1; - unsigned magic = 0, short_magic = 0; + unsigned magic = 0, short_magic = 0, global_magic = 0; const char *copyfrom = elt, *long_magic_end = NULL; char *match; int i, pathspec_prefix = -1; if (literal_global < 0) literal_global = git_env_bool(GIT_LITERAL_PATHSPECS_ENVIRONMENT, 0); + if (literal_global) + global_magic |= PATHSPEC_LITERAL; if (elt[0] != ':') { ; /* nothing to do */ @@ -164,6 +167,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, magic |= short_magic; *p_short_magic = short_magic; + magic |= global_magic; if (pathspec_prefix >= 0 && (prefixlen || (prefix && *prefix))) @@ -236,7 +240,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, elt, ce_len, ce->name); } - if (literal_global) + if (magic & PATHSPEC_LITERAL) item->nowildcard_len = item->len; else { item->nowildcard_len = simple_length(item->match); @@ -402,7 +406,8 @@ const char **get_pathspec(const char *prefix, const char **pathspec) { struct pathspec ps; parse_pathspec(&ps, - PATHSPEC_ALL_MAGIC & ~PATHSPEC_FROMTOP, + PATHSPEC_ALL_MAGIC & + ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL), PATHSPEC_PREFER_CWD, prefix, pathspec); return ps._raw; diff --git a/pathspec.h b/pathspec.h index 7ef9896edb..987d70c55a 100644 --- a/pathspec.h +++ b/pathspec.h @@ -4,9 +4,11 @@ /* Pathspec magic */ #define PATHSPEC_FROMTOP (1<<0) #define PATHSPEC_MAXDEPTH (1<<1) +#define PATHSPEC_LITERAL (1<<2) #define PATHSPEC_ALL_MAGIC \ (PATHSPEC_FROMTOP | \ - PATHSPEC_MAXDEPTH) + PATHSPEC_MAXDEPTH | \ + PATHSPEC_LITERAL) #define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */ diff --git a/t/t6130-pathspec-noglob.sh b/t/t6130-pathspec-noglob.sh index 39ef61994f..49c148e17e 100755 --- a/t/t6130-pathspec-noglob.sh +++ b/t/t6130-pathspec-noglob.sh @@ -47,18 +47,36 @@ test_expect_success 'no-glob option matches literally (vanilla)' ' test_cmp expect actual ' +test_expect_success 'no-glob option matches literally (vanilla)' ' + echo vanilla >expect && + git log --format=%s -- ":(literal)foo" >actual && + test_cmp expect actual +' + test_expect_success 'no-glob option matches literally (star)' ' echo star >expect && git --literal-pathspecs log --format=%s -- "f*" >actual && test_cmp expect actual ' +test_expect_success 'no-glob option matches literally (star)' ' + echo star >expect && + git log --format=%s -- ":(literal)f*" >actual && + test_cmp expect actual +' + test_expect_success 'no-glob option matches literally (bracket)' ' echo bracket >expect && git --literal-pathspecs log --format=%s -- "f[o][o]" >actual && test_cmp expect actual ' +test_expect_success 'no-glob option matches literally (bracket)' ' + echo bracket >expect && + git log --format=%s -- ":(literal)f[o][o]" >actual && + test_cmp expect actual +' + test_expect_success 'no-glob environment variable works' ' echo star >expect && GIT_LITERAL_PATHSPECS=1 git log --format=%s -- "f*" >actual && diff --git a/tree-diff.c b/tree-diff.c index 21a50d8ed3..ccf9d7c8fd 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -202,7 +202,7 @@ static void try_to_follow_renames(struct tree_desc *t1, struct tree_desc *t2, co * path. Magic that matches more than one path is not * supported. */ - GUARD_PATHSPEC(&opt->pathspec, PATHSPEC_FROMTOP); + GUARD_PATHSPEC(&opt->pathspec, PATHSPEC_FROMTOP | PATHSPEC_LITERAL); #if 0 /* * We should reject wildcards as well. Unfortunately we diff --git a/tree-walk.c b/tree-walk.c index 37b157ed6e..676bd7f3c8 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -636,7 +636,10 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, enum interesting never_interesting = ps->has_wildcard ? entry_not_interesting : all_entries_not_interesting; - GUARD_PATHSPEC(ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH); + GUARD_PATHSPEC(ps, + PATHSPEC_FROMTOP | + PATHSPEC_MAXDEPTH | + PATHSPEC_LITERAL); if (!ps->nr) { if (!ps->recursive || -- cgit v1.3-5-g45d5 From bd30c2e48432c692f9e77d3529c9cf25117066bb Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sun, 14 Jul 2013 15:36:08 +0700 Subject: pathspec: support :(glob) syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit :(glob)path differs from plain pathspec that it uses wildmatch with WM_PATHNAME while the other uses fnmatch without FNM_PATHNAME. The difference lies in how '*' (and '**') is processed. With the introduction of :(glob) and :(literal) and their global options --[no]glob-pathspecs, the user can: - make everything literal by default via --noglob-pathspecs --literal-pathspecs cannot be used for this purpose as it disables _all_ pathspec magic. - individually turn on globbing with :(glob) - make everything globbing by default via --glob-pathspecs - individually turn off globbing with :(literal) The implication behind this is, there is no way to gain the default matching behavior (i.e. fnmatch without FNM_PATHNAME). You either get new globbing or literal. The old fnmatch behavior is considered deprecated and discouraged to use. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git.txt | 19 ++++++++++++ Documentation/glossary-content.txt | 29 ++++++++++++++++++ builtin/add.c | 9 ++++-- builtin/ls-tree.c | 2 +- cache.h | 2 ++ dir.c | 28 +++++++++-------- dir.h | 9 +++--- git.c | 8 +++++ pathspec.c | 47 +++++++++++++++++++++++++--- pathspec.h | 4 ++- t/t6130-pathspec-noglob.sh | 63 ++++++++++++++++++++++++++++++++++++++ tree-walk.c | 9 +++--- 12 files changed, 198 insertions(+), 31 deletions(-) (limited to 'tree-walk.c') diff --git a/Documentation/git.txt b/Documentation/git.txt index 80139ae222..3571a1b7ce 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -454,6 +454,17 @@ help ...`. This is equivalent to setting the `GIT_LITERAL_PATHSPECS` environment variable to `1`. +--glob-pathspecs: + Add "glob" magic to all pathspec. This is equivalent to setting + the `GIT_GLOB_PATHSPECS` environment variable to `1`. Disabling + globbing on individual pathspecs can be done using pathspec + magic ":(literal)" + +--noglob-pathspecs: + Add "literal" magic to all pathspec. This is equivalent to setting + the `GIT_NOGLOB_PATHSPECS` environment variable to `1`. Enabling + globbing on individual pathspecs can be done using pathspec + magic ":(glob)" GIT COMMANDS ------------ @@ -860,6 +871,14 @@ GIT_LITERAL_PATHSPECS:: literal paths to Git (e.g., paths previously given to you by `git ls-tree`, `--raw` diff output, etc). +GIT_GLOB_PATHSPECS:: + Setting this variable to `1` will cause Git to treat all + pathspecs as glob patterns (aka "glob" magic). + +GIT_NOGLOB_PATHSPECS:: + Setting this variable to `1` will cause Git to treat all + pathspecs as literal (aka "literal" magic). + Discussion[[Discussion]] ------------------------ diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index ca9f20f25f..a3d9029ce7 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -333,6 +333,35 @@ top `/`;; literal;; Wildcards in the pattern such as `*` or `?` are treated as literal characters. + +glob;; + Git treats the pattern as a shell glob suitable for + consumption by fnmatch(3) with the FNM_PATHNAME flag: + wildcards in the pattern will not match a / in the pathname. + For example, "Documentation/{asterisk}.html" matches + "Documentation/git.html" but not "Documentation/ppc/ppc.html" + or "tools/perf/Documentation/perf.html". ++ +Two consecutive asterisks ("`**`") in patterns matched against +full pathname may have special meaning: + + - A leading "`**`" followed by a slash means match in all + directories. For example, "`**/foo`" matches file or directory + "`foo`" anywhere, the same as pattern "`foo`". "**/foo/bar" + matches file or directory "`bar`" anywhere that is directly + under directory "`foo`". + + - A trailing "/**" matches everything inside. For example, + "abc/**" matches all files inside directory "abc", relative + to the location of the `.gitignore` file, with infinite depth. + + - A slash followed by two consecutive asterisks then a slash + matches zero or more directories. For example, "`a/**/b`" + matches "`a/b`", "`a/x/b`", "`a/x/y/b`" and so on. + + - Other consecutive asterisks are considered invalid. ++ +Glob magic is incompatible with literal magic. -- + Currently only the slash `/` is recognized as the "magic signature", diff --git a/builtin/add.c b/builtin/add.c index 663ddd122c..1dab2464f6 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -541,11 +541,16 @@ int cmd_add(int argc, const char **argv, const char *prefix) /* * file_exists() assumes exact match */ - GUARD_PATHSPEC(&pathspec, PATHSPEC_FROMTOP | PATHSPEC_LITERAL); + GUARD_PATHSPEC(&pathspec, + PATHSPEC_FROMTOP | + PATHSPEC_LITERAL | + PATHSPEC_GLOB); for (i = 0; i < pathspec.nr; i++) { const char *path = pathspec.items[i].match; - if (!seen[i] && !file_exists(path)) { + if (!seen[i] && + ((pathspec.items[i].magic & PATHSPEC_GLOB) || + !file_exists(path))) { if (ignore_missing) { int dtype = DT_UNKNOWN; if (is_excluded(&dir, path, &dtype)) diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index 1c4f48ea8f..7882352a9b 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -173,7 +173,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) * cannot be lifted until it is converted to use * match_pathspec_depth() or tree_entry_interesting() */ - parse_pathspec(&pathspec, 0, + parse_pathspec(&pathspec, PATHSPEC_GLOB, PATHSPEC_PREFER_CWD, prefix, argv + 1); for (i = 0; i < pathspec.nr; i++) diff --git a/cache.h b/cache.h index 13e3c94bf0..dc4d2ee22d 100644 --- a/cache.h +++ b/cache.h @@ -367,6 +367,8 @@ static inline enum object_type object_type(unsigned int mode) #define GIT_NOTES_REWRITE_REF_ENVIRONMENT "GIT_NOTES_REWRITE_REF" #define GIT_NOTES_REWRITE_MODE_ENVIRONMENT "GIT_NOTES_REWRITE_MODE" #define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS" +#define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS" +#define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS" /* * This environment variable is expected to contain a boolean indicating diff --git a/dir.c b/dir.c index 50ec2f5478..076bd462e5 100644 --- a/dir.c +++ b/dir.c @@ -52,26 +52,28 @@ int fnmatch_icase(const char *pattern, const char *string, int flags) return fnmatch(pattern, string, flags | (ignore_case ? FNM_CASEFOLD : 0)); } -inline int git_fnmatch(const char *pattern, const char *string, - int flags, int prefix) +inline int git_fnmatch(const struct pathspec_item *item, + const char *pattern, const char *string, + int prefix) { - int fnm_flags = 0; - if (flags & GFNM_PATHNAME) - fnm_flags |= FNM_PATHNAME; if (prefix > 0) { if (strncmp(pattern, string, prefix)) return FNM_NOMATCH; pattern += prefix; string += prefix; } - if (flags & GFNM_ONESTAR) { + if (item->flags & PATHSPEC_ONESTAR) { int pattern_len = strlen(++pattern); int string_len = strlen(string); return string_len < pattern_len || strcmp(pattern, string + string_len - pattern_len); } - return fnmatch(pattern, string, fnm_flags); + if (item->magic & PATHSPEC_GLOB) + return wildmatch(pattern, string, WM_PATHNAME, NULL); + else + /* wildmatch has not learned no FNM_PATHNAME mode yet */ + return fnmatch(pattern, string, 0); } static int fnmatch_icase_mem(const char *pattern, int patternlen, @@ -111,7 +113,8 @@ static size_t common_prefix_len(const struct pathspec *pathspec) GUARD_PATHSPEC(pathspec, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | - PATHSPEC_LITERAL); + PATHSPEC_LITERAL | + PATHSPEC_GLOB); for (n = 0; n < pathspec->nr; n++) { size_t i = 0, len = 0; @@ -206,8 +209,7 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix, } if (item->nowildcard_len < item->len && - !git_fnmatch(match, name, - item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0, + !git_fnmatch(item, match, name, item->nowildcard_len - prefix)) return MATCHED_FNMATCH; @@ -238,7 +240,8 @@ int match_pathspec_depth(const struct pathspec *ps, GUARD_PATHSPEC(ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | - PATHSPEC_LITERAL); + PATHSPEC_LITERAL | + PATHSPEC_GLOB); if (!ps->nr) { if (!ps->recursive || @@ -1297,7 +1300,8 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru GUARD_PATHSPEC(pathspec, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | - PATHSPEC_LITERAL); + PATHSPEC_LITERAL | + PATHSPEC_GLOB); if (has_symlink_leading_path(path, len)) return dir->nr; diff --git a/dir.h b/dir.h index 7d051e368a..343ec7aa31 100644 --- a/dir.h +++ b/dir.h @@ -199,10 +199,9 @@ extern int fnmatch_icase(const char *pattern, const char *string, int flags); /* * The prefix part of pattern must not contains wildcards. */ -#define GFNM_PATHNAME 1 /* similar to FNM_PATHNAME */ -#define GFNM_ONESTAR 2 /* there is only _one_ wildcard, a star */ - -extern int git_fnmatch(const char *pattern, const char *string, - int flags, int prefix); +struct pathspec_item; +extern int git_fnmatch(const struct pathspec_item *item, + const char *pattern, const char *string, + int prefix); #endif diff --git a/git.c b/git.c index 4359086fd6..25096755f4 100644 --- a/git.c +++ b/git.c @@ -147,6 +147,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) setenv(GIT_LITERAL_PATHSPECS_ENVIRONMENT, "0", 1); if (envchanged) *envchanged = 1; + } else if (!strcmp(cmd, "--glob-pathspecs")) { + setenv(GIT_GLOB_PATHSPECS_ENVIRONMENT, "1", 1); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--noglob-pathspecs")) { + setenv(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, "1", 1); + if (envchanged) + *envchanged = 1; } else if (!strcmp(cmd, "--shallow-file")) { (*argv)++; (*argc)--; diff --git a/pathspec.c b/pathspec.c index b6d8e74277..c1e6917897 100644 --- a/pathspec.c +++ b/pathspec.c @@ -57,7 +57,6 @@ char *find_pathspecs_matching_against_index(const struct pathspec *pathspec) * * Possible future magic semantics include stuff like: * - * { PATHSPEC_NOGLOB, '!', "noglob" }, * { PATHSPEC_ICASE, '\0', "icase" }, * { PATHSPEC_RECURSIVE, '*', "recursive" }, * { PATHSPEC_REGEXP, '\0', "regexp" }, @@ -71,6 +70,7 @@ static struct pathspec_magic { } pathspec_magic[] = { { PATHSPEC_FROMTOP, '/', "top" }, { PATHSPEC_LITERAL, 0, "literal" }, + { PATHSPEC_GLOB, '\0', "glob" }, }; /* @@ -93,6 +93,8 @@ static unsigned prefix_pathspec(struct pathspec_item *item, const char *elt) { static int literal_global = -1; + static int glob_global = -1; + static int noglob_global = -1; unsigned magic = 0, short_magic = 0, global_magic = 0; const char *copyfrom = elt, *long_magic_end = NULL; char *match; @@ -103,6 +105,22 @@ static unsigned prefix_pathspec(struct pathspec_item *item, if (literal_global) global_magic |= PATHSPEC_LITERAL; + if (glob_global < 0) + glob_global = git_env_bool(GIT_GLOB_PATHSPECS_ENVIRONMENT, 0); + if (glob_global) + global_magic |= PATHSPEC_GLOB; + + if (noglob_global < 0) + noglob_global = git_env_bool(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, 0); + + if (glob_global && noglob_global) + die(_("global 'glob' and 'noglob' pathspec settings are incompatible")); + + if ((global_magic & PATHSPEC_LITERAL) && + (global_magic & ~PATHSPEC_LITERAL)) + die(_("global 'literal' pathspec setting is incompatible " + "with all other global pathspec settings")); + if (elt[0] != ':' || literal_global) { ; /* nothing to do */ } else if (elt[1] == '(') { @@ -167,12 +185,24 @@ static unsigned prefix_pathspec(struct pathspec_item *item, magic |= short_magic; *p_short_magic = short_magic; + + /* --noglob-pathspec adds :(literal) _unless_ :(glob) is specifed */ + if (noglob_global && !(magic & PATHSPEC_GLOB)) + global_magic |= PATHSPEC_LITERAL; + + /* --glob-pathspec is overriden by :(literal) */ + if ((global_magic & PATHSPEC_GLOB) && (magic & PATHSPEC_LITERAL)) + global_magic &= ~PATHSPEC_GLOB; + magic |= global_magic; if (pathspec_prefix >= 0 && (prefixlen || (prefix && *prefix))) die("BUG: 'prefix' magic is supposed to be used at worktree's root"); + if ((magic & PATHSPEC_LITERAL) && (magic & PATHSPEC_GLOB)) + die(_("%s: 'literal' and 'glob' are incompatible"), elt); + if (pathspec_prefix >= 0) { match = xstrdup(copyfrom); prefixlen = pathspec_prefix; @@ -248,10 +278,17 @@ static unsigned prefix_pathspec(struct pathspec_item *item, item->nowildcard_len = prefixlen; } item->flags = 0; - if (item->nowildcard_len < item->len && - item->match[item->nowildcard_len] == '*' && - no_wildcard(item->match + item->nowildcard_len + 1)) - item->flags |= PATHSPEC_ONESTAR; + if (magic & PATHSPEC_GLOB) { + /* + * FIXME: should we enable ONESTAR in _GLOB for + * pattern "* * / * . c"? + */ + } else { + if (item->nowildcard_len < item->len && + item->match[item->nowildcard_len] == '*' && + no_wildcard(item->match + item->nowildcard_len + 1)) + item->flags |= PATHSPEC_ONESTAR; + } /* sanity checks, pathspec matchers assume these are sane */ assert(item->nowildcard_len <= item->len && diff --git a/pathspec.h b/pathspec.h index 987d70c55a..cdf2fa39f6 100644 --- a/pathspec.h +++ b/pathspec.h @@ -5,10 +5,12 @@ #define PATHSPEC_FROMTOP (1<<0) #define PATHSPEC_MAXDEPTH (1<<1) #define PATHSPEC_LITERAL (1<<2) +#define PATHSPEC_GLOB (1<<3) #define PATHSPEC_ALL_MAGIC \ (PATHSPEC_FROMTOP | \ PATHSPEC_MAXDEPTH | \ - PATHSPEC_LITERAL) + PATHSPEC_LITERAL | \ + PATHSPEC_GLOB) #define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */ diff --git a/t/t6130-pathspec-noglob.sh b/t/t6130-pathspec-noglob.sh index 8551b026de..ea00d71e77 100755 --- a/t/t6130-pathspec-noglob.sh +++ b/t/t6130-pathspec-noglob.sh @@ -32,6 +32,16 @@ test_expect_success 'star pathspec globs' ' test_cmp expect actual ' +test_expect_success 'star pathspec globs' ' + cat >expect <<-\EOF && + bracket + star + vanilla + EOF + git log --format=%s -- ":(glob)f*" >actual && + test_cmp expect actual +' + test_expect_success 'bracket pathspec globs and matches literal brackets' ' cat >expect <<-\EOF && bracket @@ -41,6 +51,15 @@ test_expect_success 'bracket pathspec globs and matches literal brackets' ' test_cmp expect actual ' +test_expect_success 'bracket pathspec globs and matches literal brackets' ' + cat >expect <<-\EOF && + bracket + vanilla + EOF + git log --format=%s -- ":(glob)f[o][o]" >actual && + test_cmp expect actual +' + test_expect_success 'no-glob option matches literally (vanilla)' ' echo vanilla >expect && git --literal-pathspecs log --format=%s -- foo >actual && @@ -89,4 +108,48 @@ test_expect_success 'no-glob environment variable works' ' test_cmp expect actual ' +test_expect_success 'setup xxx/bar' ' + mkdir xxx && + test_commit xxx xxx/bar +' + +test_expect_success '**/ works with :(glob)' ' + cat >expect <<-\EOF && + xxx + unrelated + EOF + git log --format=%s -- ":(glob)**/bar" >actual && + test_cmp expect actual +' + +test_expect_success '**/ does not work with --noglob-pathspecs' ' + : >expect && + git --noglob-pathspecs log --format=%s -- "**/bar" >actual && + test_cmp expect actual +' + +test_expect_success '**/ works with :(glob) and --noglob-pathspecs' ' + cat >expect <<-\EOF && + xxx + unrelated + EOF + git --noglob-pathspecs log --format=%s -- ":(glob)**/bar" >actual && + test_cmp expect actual +' + +test_expect_success '**/ works with --glob-pathspecs' ' + cat >expect <<-\EOF && + xxx + unrelated + EOF + git --glob-pathspecs log --format=%s -- "**/bar" >actual && + test_cmp expect actual +' + +test_expect_success '**/ does not work with :(literal) and --glob-pathspecs' ' + : >expect && + git --glob-pathspecs log --format=%s -- ":(literal)**/bar" >actual && + test_cmp expect actual +' + test_done diff --git a/tree-walk.c b/tree-walk.c index 676bd7f3c8..a44f528b3b 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -639,7 +639,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, GUARD_PATHSPEC(ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | - PATHSPEC_LITERAL); + PATHSPEC_LITERAL | + PATHSPEC_GLOB); if (!ps->nr) { if (!ps->recursive || @@ -685,8 +686,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, return entry_interesting; if (item->nowildcard_len < item->len) { - if (!git_fnmatch(match + baselen, entry->path, - item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0, + if (!git_fnmatch(item, match + baselen, entry->path, item->nowildcard_len - baselen)) return entry_interesting; @@ -727,8 +727,7 @@ match_wildcards: strbuf_add(base, entry->path, pathlen); - if (!git_fnmatch(match, base->buf + base_offset, - item->flags & PATHSPEC_ONESTAR ? GFNM_ONESTAR : 0, + if (!git_fnmatch(item, match, base->buf + base_offset, item->nowildcard_len)) { strbuf_setlen(base, base_offset + baselen); return entry_interesting; -- cgit v1.3-5-g45d5 From 93d935371654faf2956a4c37c1ca46f3195ee832 Mon Sep 17 00:00:00 2001 From: Nguyễn Thái Ngọc Duy Date: Sun, 14 Jul 2013 15:36:09 +0700 Subject: parse_pathspec: accept :(icase)path syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git.txt | 8 ++++ Documentation/glossary-content.txt | 3 ++ builtin/add.c | 6 ++- builtin/ls-tree.c | 2 +- cache.h | 1 + dir.c | 74 ++++++++++++++++++++++++----- git.c | 4 ++ pathspec.c | 9 +++- pathspec.h | 22 ++++++++- t/t6131-pathspec-icase.sh | 97 ++++++++++++++++++++++++++++++++++++++ tree-walk.c | 59 ++++++++++++++++++----- 11 files changed, 257 insertions(+), 28 deletions(-) create mode 100755 t/t6131-pathspec-icase.sh (limited to 'tree-walk.c') diff --git a/Documentation/git.txt b/Documentation/git.txt index 3571a1b7ce..2c1f6f5f53 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -466,6 +466,10 @@ help ...`. globbing on individual pathspecs can be done using pathspec magic ":(glob)" +--icase-pathspecs: + Add "icase" magic to all pathspec. This is equivalent to setting + the `GIT_ICASE_PATHSPECS` environment variable to `1`. + GIT COMMANDS ------------ @@ -879,6 +883,10 @@ GIT_NOGLOB_PATHSPECS:: Setting this variable to `1` will cause Git to treat all pathspecs as literal (aka "literal" magic). +GIT_ICASE_PATHSPECS:: + Setting this variable to `1` will cause Git to treat all + pathspecs as case-insensitive. + Discussion[[Discussion]] ------------------------ diff --git a/Documentation/glossary-content.txt b/Documentation/glossary-content.txt index a3d9029ce7..13a64d3aac 100644 --- a/Documentation/glossary-content.txt +++ b/Documentation/glossary-content.txt @@ -334,6 +334,9 @@ literal;; Wildcards in the pattern such as `*` or `?` are treated as literal characters. +icase;; + Case insensitive match. + glob;; Git treats the pattern as a shell glob suitable for consumption by fnmatch(3) with the FNM_PATHNAME flag: diff --git a/builtin/add.c b/builtin/add.c index 1dab2464f6..9d52fc7915 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -544,12 +544,14 @@ int cmd_add(int argc, const char **argv, const char *prefix) GUARD_PATHSPEC(&pathspec, PATHSPEC_FROMTOP | PATHSPEC_LITERAL | - PATHSPEC_GLOB); + PATHSPEC_GLOB | + PATHSPEC_ICASE); for (i = 0; i < pathspec.nr; i++) { const char *path = pathspec.items[i].match; if (!seen[i] && - ((pathspec.items[i].magic & PATHSPEC_GLOB) || + ((pathspec.items[i].magic & + (PATHSPEC_GLOB | PATHSPEC_ICASE)) || !file_exists(path))) { if (ignore_missing) { int dtype = DT_UNKNOWN; diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c index 7882352a9b..f6d8215181 100644 --- a/builtin/ls-tree.c +++ b/builtin/ls-tree.c @@ -173,7 +173,7 @@ int cmd_ls_tree(int argc, const char **argv, const char *prefix) * cannot be lifted until it is converted to use * match_pathspec_depth() or tree_entry_interesting() */ - parse_pathspec(&pathspec, PATHSPEC_GLOB, + parse_pathspec(&pathspec, PATHSPEC_GLOB | PATHSPEC_ICASE, PATHSPEC_PREFER_CWD, prefix, argv + 1); for (i = 0; i < pathspec.nr; i++) diff --git a/cache.h b/cache.h index dc4d2ee22d..3cff825d5c 100644 --- a/cache.h +++ b/cache.h @@ -369,6 +369,7 @@ static inline enum object_type object_type(unsigned int mode) #define GIT_LITERAL_PATHSPECS_ENVIRONMENT "GIT_LITERAL_PATHSPECS" #define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS" #define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS" +#define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS" /* * This environment variable is expected to contain a boolean indicating diff --git a/dir.c b/dir.c index 076bd462e5..8543736deb 100644 --- a/dir.c +++ b/dir.c @@ -57,7 +57,7 @@ inline int git_fnmatch(const struct pathspec_item *item, int prefix) { if (prefix > 0) { - if (strncmp(pattern, string, prefix)) + if (ps_strncmp(item, pattern, string, prefix)) return FNM_NOMATCH; pattern += prefix; string += prefix; @@ -66,14 +66,18 @@ inline int git_fnmatch(const struct pathspec_item *item, int pattern_len = strlen(++pattern); int string_len = strlen(string); return string_len < pattern_len || - strcmp(pattern, - string + string_len - pattern_len); + ps_strcmp(item, pattern, + string + string_len - pattern_len); } if (item->magic & PATHSPEC_GLOB) - return wildmatch(pattern, string, WM_PATHNAME, NULL); + return wildmatch(pattern, string, + WM_PATHNAME | + (item->magic & PATHSPEC_ICASE ? WM_CASEFOLD : 0), + NULL); else /* wildmatch has not learned no FNM_PATHNAME mode yet */ - return fnmatch(pattern, string, 0); + return fnmatch(pattern, string, + item->magic & PATHSPEC_ICASE ? FNM_CASEFOLD : 0); } static int fnmatch_icase_mem(const char *pattern, int patternlen, @@ -110,16 +114,27 @@ static size_t common_prefix_len(const struct pathspec *pathspec) int n; size_t max = 0; + /* + * ":(icase)path" is treated as a pathspec full of + * wildcard. In other words, only prefix is considered common + * prefix. If the pathspec is abc/foo abc/bar, running in + * subdir xyz, the common prefix is still xyz, not xuz/abc as + * in non-:(icase). + */ GUARD_PATHSPEC(pathspec, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | PATHSPEC_LITERAL | - PATHSPEC_GLOB); + PATHSPEC_GLOB | + PATHSPEC_ICASE); for (n = 0; n < pathspec->nr; n++) { - size_t i = 0, len = 0; - while (i < pathspec->items[n].nowildcard_len && - (n == 0 || i < max)) { + size_t i = 0, len = 0, item_len; + if (pathspec->items[n].magic & PATHSPEC_ICASE) + item_len = pathspec->items[n].prefix; + else + item_len = pathspec->items[n].nowildcard_len; + while (i < item_len && (n == 0 || i < max)) { char c = pathspec->items[n].match[i]; if (c != pathspec->items[0].match[i]) break; @@ -196,11 +211,44 @@ static int match_pathspec_item(const struct pathspec_item *item, int prefix, const char *match = item->match + prefix; int matchlen = item->len - prefix; + /* + * The normal call pattern is: + * 1. prefix = common_prefix_len(ps); + * 2. prune something, or fill_directory + * 3. match_pathspec_depth() + * + * 'prefix' at #1 may be shorter than the command's prefix and + * it's ok for #2 to match extra files. Those extras will be + * trimmed at #3. + * + * Suppose the pathspec is 'foo' and '../bar' running from + * subdir 'xyz'. The common prefix at #1 will be empty, thanks + * to "../". We may have xyz/foo _and_ XYZ/foo after #2. The + * user does not want XYZ/foo, only the "foo" part should be + * case-insensitive. We need to filter out XYZ/foo here. In + * other words, we do not trust the caller on comparing the + * prefix part when :(icase) is involved. We do exact + * comparison ourselves. + * + * Normally the caller (common_prefix_len() in fact) does + * _exact_ matching on name[-prefix+1..-1] and we do not need + * to check that part. Be defensive and check it anyway, in + * case common_prefix_len is changed, or a new caller is + * introduced that does not use common_prefix_len. + * + * If the penalty turns out too high when prefix is really + * long, maybe change it to + * strncmp(match, name, item->prefix - prefix) + */ + if (item->prefix && (item->magic & PATHSPEC_ICASE) && + strncmp(item->match, name - prefix, item->prefix)) + return 0; + /* If the match was just the prefix, we matched */ if (!*match) return MATCHED_RECURSIVELY; - if (matchlen <= namelen && !strncmp(match, name, matchlen)) { + if (matchlen <= namelen && !ps_strncmp(item, match, name, matchlen)) { if (matchlen == namelen) return MATCHED_EXACTLY; @@ -241,7 +289,8 @@ int match_pathspec_depth(const struct pathspec *ps, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | PATHSPEC_LITERAL | - PATHSPEC_GLOB); + PATHSPEC_GLOB | + PATHSPEC_ICASE); if (!ps->nr) { if (!ps->recursive || @@ -1301,7 +1350,8 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | PATHSPEC_LITERAL | - PATHSPEC_GLOB); + PATHSPEC_GLOB | + PATHSPEC_ICASE); if (has_symlink_leading_path(path, len)) return dir->nr; diff --git a/git.c b/git.c index 25096755f4..cebf8827da 100644 --- a/git.c +++ b/git.c @@ -155,6 +155,10 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) setenv(GIT_NOGLOB_PATHSPECS_ENVIRONMENT, "1", 1); if (envchanged) *envchanged = 1; + } else if (!strcmp(cmd, "--icase-pathspecs")) { + setenv(GIT_ICASE_PATHSPECS_ENVIRONMENT, "1", 1); + if (envchanged) + *envchanged = 1; } else if (!strcmp(cmd, "--shallow-file")) { (*argv)++; (*argc)--; diff --git a/pathspec.c b/pathspec.c index c1e6917897..d9f4143222 100644 --- a/pathspec.c +++ b/pathspec.c @@ -57,7 +57,6 @@ char *find_pathspecs_matching_against_index(const struct pathspec *pathspec) * * Possible future magic semantics include stuff like: * - * { PATHSPEC_ICASE, '\0', "icase" }, * { PATHSPEC_RECURSIVE, '*', "recursive" }, * { PATHSPEC_REGEXP, '\0', "regexp" }, * @@ -71,6 +70,7 @@ static struct pathspec_magic { { PATHSPEC_FROMTOP, '/', "top" }, { PATHSPEC_LITERAL, 0, "literal" }, { PATHSPEC_GLOB, '\0', "glob" }, + { PATHSPEC_ICASE, '\0', "icase" }, }; /* @@ -95,6 +95,7 @@ static unsigned prefix_pathspec(struct pathspec_item *item, static int literal_global = -1; static int glob_global = -1; static int noglob_global = -1; + static int icase_global = -1; unsigned magic = 0, short_magic = 0, global_magic = 0; const char *copyfrom = elt, *long_magic_end = NULL; char *match; @@ -116,6 +117,12 @@ static unsigned prefix_pathspec(struct pathspec_item *item, if (glob_global && noglob_global) die(_("global 'glob' and 'noglob' pathspec settings are incompatible")); + + if (icase_global < 0) + icase_global = git_env_bool(GIT_ICASE_PATHSPECS_ENVIRONMENT, 0); + if (icase_global) + global_magic |= PATHSPEC_ICASE; + if ((global_magic & PATHSPEC_LITERAL) && (global_magic & ~PATHSPEC_LITERAL)) die(_("global 'literal' pathspec setting is incompatible " diff --git a/pathspec.h b/pathspec.h index cdf2fa39f6..04b632fa33 100644 --- a/pathspec.h +++ b/pathspec.h @@ -6,11 +6,13 @@ #define PATHSPEC_MAXDEPTH (1<<1) #define PATHSPEC_LITERAL (1<<2) #define PATHSPEC_GLOB (1<<3) +#define PATHSPEC_ICASE (1<<4) #define PATHSPEC_ALL_MAGIC \ (PATHSPEC_FROMTOP | \ PATHSPEC_MAXDEPTH | \ PATHSPEC_LITERAL | \ - PATHSPEC_GLOB) + PATHSPEC_GLOB | \ + PATHSPEC_ICASE) #define PATHSPEC_ONESTAR 1 /* the pathspec pattern sastisfies GFNM_ONESTAR */ @@ -65,6 +67,24 @@ extern void parse_pathspec(struct pathspec *pathspec, extern void copy_pathspec(struct pathspec *dst, const struct pathspec *src); extern void free_pathspec(struct pathspec *); +static inline int ps_strncmp(const struct pathspec_item *item, + const char *s1, const char *s2, size_t n) +{ + if (item->magic & PATHSPEC_ICASE) + return strncasecmp(s1, s2, n); + else + return strncmp(s1, s2, n); +} + +static inline int ps_strcmp(const struct pathspec_item *item, + const char *s1, const char *s2) +{ + if (item->magic & PATHSPEC_ICASE) + return strcasecmp(s1, s2); + else + return strcmp(s1, s2); +} + extern char *find_pathspecs_matching_against_index(const struct pathspec *pathspec); extern void add_pathspec_matches_against_index(const struct pathspec *pathspec, char *seen); extern const char *check_path_for_gitlink(const char *path); diff --git a/t/t6131-pathspec-icase.sh b/t/t6131-pathspec-icase.sh new file mode 100755 index 0000000000..3215eef525 --- /dev/null +++ b/t/t6131-pathspec-icase.sh @@ -0,0 +1,97 @@ +#!/bin/sh + +test_description='test case insensitive pathspec limiting' +. ./test-lib.sh + +test_expect_success 'create commits with glob characters' ' + test_commit bar bar && + test_commit bAr bAr && + test_commit BAR BAR && + mkdir foo && + test_commit foo/bar foo/bar && + test_commit foo/bAr foo/bAr && + test_commit foo/BAR foo/BAR && + mkdir fOo && + test_commit fOo/bar fOo/bar && + test_commit fOo/bAr fOo/bAr && + test_commit fOo/BAR fOo/BAR && + mkdir FOO && + test_commit FOO/bar FOO/bar && + test_commit FOO/bAr FOO/bAr && + test_commit FOO/BAR FOO/BAR +' + +test_expect_success 'tree_entry_interesting matches bar' ' + echo bar >expect && + git log --format=%s -- "bar" >actual && + test_cmp expect actual +' + +test_expect_success 'tree_entry_interesting matches :(icase)bar' ' + cat <<-EOF >expect && + BAR + bAr + bar + EOF + git log --format=%s -- ":(icase)bar" >actual && + test_cmp expect actual +' + +test_expect_success 'tree_entry_interesting matches :(icase)bar with prefix' ' + cat <<-EOF >expect && + fOo/BAR + fOo/bAr + fOo/bar + EOF + ( cd fOo && git log --format=%s -- ":(icase)bar" ) >actual && + test_cmp expect actual +' + +test_expect_success 'tree_entry_interesting matches :(icase)bar with empty prefix' ' + cat <<-EOF >expect && + FOO/BAR + FOO/bAr + FOO/bar + fOo/BAR + fOo/bAr + fOo/bar + foo/BAR + foo/bAr + foo/bar + EOF + ( cd fOo && git log --format=%s -- ":(icase)../foo/bar" ) >actual && + test_cmp expect actual +' + +test_expect_success 'match_pathspec_depth matches :(icase)bar' ' + cat <<-EOF >expect && + BAR + bAr + bar + EOF + git ls-files ":(icase)bar" >actual && + test_cmp expect actual +' + +test_expect_success 'match_pathspec_depth matches :(icase)bar with prefix' ' + cat <<-EOF >expect && + fOo/BAR + fOo/bAr + fOo/bar + EOF + ( cd fOo && git ls-files --full-name ":(icase)bar" ) >actual && + test_cmp expect actual +' + +test_expect_success 'match_pathspec_depth matches :(icase)bar with empty prefix' ' + cat <<-EOF >expect && + bar + fOo/BAR + fOo/bAr + fOo/bar + EOF + ( cd fOo && git ls-files --full-name ":(icase)bar" ../bar ) >actual && + test_cmp expect actual +' + +test_done diff --git a/tree-walk.c b/tree-walk.c index a44f528b3b..c366852553 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -489,13 +489,25 @@ int get_tree_entry(const unsigned char *tree_sha1, const char *name, unsigned ch return retval; } -static int match_entry(const struct name_entry *entry, int pathlen, +static int match_entry(const struct pathspec_item *item, + const struct name_entry *entry, int pathlen, const char *match, int matchlen, enum interesting *never_interesting) { int m = -1; /* signals that we haven't called strncmp() */ - if (*never_interesting != entry_not_interesting) { + if (item->magic & PATHSPEC_ICASE) + /* + * "Never interesting" trick requires exact + * matching. We could do something clever with inexact + * matching, but it's trickier (and not to forget that + * strcasecmp is locale-dependent, at least in + * glibc). Just disable it for now. It can't be worse + * than the wildcard's codepath of '[Tt][Hi][Is][Ss]' + * pattern. + */ + *never_interesting = entry_not_interesting; + else if (*never_interesting != entry_not_interesting) { /* * We have not seen any match that sorts later * than the current path. @@ -541,7 +553,7 @@ static int match_entry(const struct name_entry *entry, int pathlen, * we cheated and did not do strncmp(), so we do * that here. */ - m = strncmp(match, entry->path, pathlen); + m = ps_strncmp(item, match, entry->path, pathlen); /* * If common part matched earlier then it is a hit, @@ -549,15 +561,39 @@ static int match_entry(const struct name_entry *entry, int pathlen, * leading directory and is shorter than match. */ if (!m) + /* + * match_entry does not check if the prefix part is + * matched case-sensitively. If the entry is a + * directory and part of prefix, it'll be rematched + * eventually by basecmp with special treatment for + * the prefix. + */ return 1; return 0; } -static int match_dir_prefix(const char *base, +/* :(icase)-aware string compare */ +static int basecmp(const struct pathspec_item *item, + const char *base, const char *match, int len) +{ + if (item->magic & PATHSPEC_ICASE) { + int ret, n = len > item->prefix ? item->prefix : len; + ret = strncmp(base, match, n); + if (ret) + return ret; + base += n; + match += n; + len -= n; + } + return ps_strncmp(item, base, match, len); +} + +static int match_dir_prefix(const struct pathspec_item *item, + const char *base, const char *match, int matchlen) { - if (strncmp(base, match, matchlen)) + if (basecmp(item, base, match, matchlen)) return 0; /* @@ -594,7 +630,7 @@ static int match_wildcard_base(const struct pathspec_item *item, */ if (baselen >= matchlen) { *matched = matchlen; - return !strncmp(base, match, matchlen); + return !basecmp(item, base, match, matchlen); } dirlen = matchlen; @@ -607,7 +643,7 @@ static int match_wildcard_base(const struct pathspec_item *item, * base ends with '/' so we are sure it really matches * directory */ - if (strncmp(base, match, baselen)) + if (basecmp(item, base, match, baselen)) return 0; *matched = baselen; } else @@ -640,7 +676,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, PATHSPEC_FROMTOP | PATHSPEC_MAXDEPTH | PATHSPEC_LITERAL | - PATHSPEC_GLOB); + PATHSPEC_GLOB | + PATHSPEC_ICASE); if (!ps->nr) { if (!ps->recursive || @@ -663,7 +700,7 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, if (baselen >= matchlen) { /* If it doesn't match, move along... */ - if (!match_dir_prefix(base_str, match, matchlen)) + if (!match_dir_prefix(item, base_str, match, matchlen)) goto match_wildcards; if (!ps->recursive || @@ -679,8 +716,8 @@ enum interesting tree_entry_interesting(const struct name_entry *entry, } /* Either there must be no base, or the base must match. */ - if (baselen == 0 || !strncmp(base_str, match, baselen)) { - if (match_entry(entry, pathlen, + if (baselen == 0 || !basecmp(item, base_str, match, baselen)) { + if (match_entry(item, entry, pathlen, match + baselen, matchlen - baselen, &never_interesting)) return entry_interesting; -- cgit v1.3-5-g45d5