From 41de0c6fbcc3d2544ebada3a9f26dec0f32f42de Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:36 +0000 Subject: sparse-checkout: cone mode does not recognize "**" When core.sparseCheckoutCone is enabled, the 'git sparse-checkout set' command creates a restricted set of possible patterns that are used by a custom algorithm to quickly match those patterns. If a user manually edits the sparse-checkout file, then they could create patterns that do not match these expectations. The cone-mode matching algorithm can return incorrect results. The solution is to detect these incorrect patterns, warn that we do not recognize them, and revert to the standard algorithm. Check each pattern for the "**" substring, and revert to the old logic if seen. While technically a "//**" pattern matches the meaning of "//", it is not one that would be written by the sparse-checkout builtin in cone mode. Attempting to accept that pattern change complicates the logic and instead we punt and do not accept any instance of "**". Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 22d08e61c2..40fed73a94 100644 --- a/dir.c +++ b/dir.c @@ -651,11 +651,16 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern return; } + if (strstr(given->pattern, "**")) { + /* Not a cone pattern. */ + warning(_("unrecognized pattern: '%s'"), given->pattern); + goto clear_hashmaps; + } + if (given->patternlen > 2 && !strcmp(given->pattern + given->patternlen - 2, "/*")) { if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { /* Not a cone pattern. */ - pl->use_cone_patterns = 0; warning(_("unrecognized pattern: '%s'"), given->pattern); goto clear_hashmaps; } -- cgit v1.3 From 9e6d3e64175713bc0007f3012ea288f4dfc0a399 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Jan 2020 21:19:37 +0000 Subject: sparse-checkout: detect short patterns In cone mode, the shortest pattern the sparse-checkout command will write into the sparse-checkout file is "/*". This is handled carefully in add_pattern_to_hashsets(), so warn if any other pattern is this short. This will assist future pattern checks by allowing us to assume there are at least three characters in the pattern. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 3 ++- t/t1091-sparse-checkout-builtin.sh | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 40fed73a94..c2e585607e 100644 --- a/dir.c +++ b/dir.c @@ -651,7 +651,8 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern return; } - if (strstr(given->pattern, "**")) { + if (given->patternlen <= 2 || + strstr(given->pattern, "**")) { /* Not a cone pattern. */ warning(_("unrecognized pattern: '%s'"), given->pattern); goto clear_hashmaps; diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index e2e45dc7fd..2e57534799 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -339,4 +339,13 @@ test_expect_success 'pattern-checks: /A/**/B/' ' check_files repo/deep/deeper1 "deepest" ' +test_expect_success 'pattern-checks: too short' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /a + EOF + check_read_tree_errors repo "a" "disabling cone pattern matching" +' + test_done -- cgit v1.3 From 9abc60f8015d060d3f3433b105648a4725c97bd1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:08 +0000 Subject: sparse-checkout: warn on globs in cone patterns In cone mode, the sparse-checkout commmand will write patterns that allow faster pattern matching. This matching only works if the patterns in the sparse-checkout file are those written by that command. Users can edit the sparse-checkout file and create patterns that cause the cone mode matching to fail. The cone mode patterns may end in "/*" but otherwise an un-escaped asterisk or other glob character is invalid. Add checks to disable cone mode when seeing these values. A later change will properly handle escaped globs. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 36 +++++++++++++++++++++++++++++++++++ t/t1091-sparse-checkout-builtin.sh | 39 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'dir.c') diff --git a/dir.c b/dir.c index c2e585607e..71d28331f3 100644 --- a/dir.c +++ b/dir.c @@ -635,6 +635,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern struct pattern_entry *translated; char *truncated; char *data = NULL; + const char *prev, *cur, *next; if (!pl->use_cone_patterns) return; @@ -652,12 +653,47 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern } if (given->patternlen <= 2 || + *given->pattern == '*' || strstr(given->pattern, "**")) { /* Not a cone pattern. */ warning(_("unrecognized pattern: '%s'"), given->pattern); goto clear_hashmaps; } + prev = given->pattern; + cur = given->pattern + 1; + next = given->pattern + 2; + + while (*cur) { + /* Watch for glob characters '*', '\', '[', '?' */ + if (!is_glob_special(*cur)) + goto increment; + + /* But only if *prev != '\\' */ + if (*prev == '\\') + goto increment; + + /* But allow the initial '\' */ + if (*cur == '\\' && + is_glob_special(*next)) + goto increment; + + /* But a trailing '/' then '*' is fine */ + if (*prev == '/' && + *cur == '*' && + *next == 0) + goto increment; + + /* Not a cone pattern. */ + warning(_("unrecognized pattern: '%s'"), given->pattern); + goto clear_hashmaps; + + increment: + prev++; + cur++; + next++; + } + if (given->patternlen > 2 && !strcmp(given->pattern + given->patternlen - 2, "/*")) { if (!(given->flags & PATTERN_FLAG_NEGATIVE)) { diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index 2e57534799..c732abeacd 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -348,4 +348,43 @@ test_expect_success 'pattern-checks: too short' ' check_read_tree_errors repo "a" "disabling cone pattern matching" ' +test_expect_success 'pattern-checks: trailing "*"' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /a* + EOF + check_read_tree_errors repo "a" "disabling cone pattern matching" +' + +test_expect_success 'pattern-checks: starting "*"' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + *eep/ + EOF + check_read_tree_errors repo "a deep" "disabling cone pattern matching" +' + +test_expect_success 'pattern-checks: contained glob characters' ' + for c in "[a]" "\\" "?" "*" + do + cat >repo/.git/info/sparse-checkout <<-EOF && + /* + !/*/ + something$c-else/ + EOF + check_read_tree_errors repo "a" "disabling cone pattern matching" + done +' + +test_expect_success 'pattern-checks: escaped "*"' ' + cat >repo/.git/info/sparse-checkout <<-\EOF && + /* + !/*/ + /does\*not\*exist/ + EOF + check_read_tree_errors repo "a" "" +' + test_done -- cgit v1.3 From 4f52c2ce6c578896964e960f6017510f0efd3f46 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 31 Jan 2020 20:16:09 +0000 Subject: sparse-checkout: properly match escaped characters In cone mode, the sparse-checkout feature uses hashset containment queries to match paths. Make this algorithm respect escaped asterisk (*) and backslash (\) characters. Create dup_and_filter_pattern() method to convert a pattern by removing escape characters and dropping an optional "/*" at the end. This method is available in dir.h as we will use it in builtin/sparse-checkout.c in a later change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- dir.c | 35 ++++++++++++++++++++++++++++++++--- t/t1091-sparse-checkout-builtin.sh | 23 +++++++++++++++++++---- 2 files changed, 51 insertions(+), 7 deletions(-) (limited to 'dir.c') diff --git a/dir.c b/dir.c index 71d28331f3..7ac0920b71 100644 --- a/dir.c +++ b/dir.c @@ -630,6 +630,36 @@ int pl_hashmap_cmp(const void *unused_cmp_data, return strncmp(ee1->pattern, ee2->pattern, min_len); } +static char *dup_and_filter_pattern(const char *pattern) +{ + char *set, *read; + size_t count = 0; + char *result = xstrdup(pattern); + + set = result; + read = result; + + while (*read) { + /* skip escape characters (once) */ + if (*read == '\\') + read++; + + *set = *read; + + set++; + read++; + count++; + } + *set = 0; + + if (count > 2 && + *(set - 1) == '*' && + *(set - 2) == '/') + *(set - 2) = 0; + + return result; +} + static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given) { struct pattern_entry *translated; @@ -702,8 +732,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern goto clear_hashmaps; } - truncated = xstrdup(given->pattern); - truncated[given->patternlen - 2] = 0; + truncated = dup_and_filter_pattern(given->pattern); translated = xmalloc(sizeof(struct pattern_entry)); translated->pattern = truncated; @@ -737,7 +766,7 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern translated = xmalloc(sizeof(struct pattern_entry)); - translated->pattern = xstrdup(given->pattern); + translated->pattern = dup_and_filter_pattern(given->pattern); translated->patternlen = given->patternlen; hashmap_entry_init(&translated->ent, ignore_case ? diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index c732abeacd..9ea700896d 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -378,13 +378,28 @@ test_expect_success 'pattern-checks: contained glob characters' ' done ' -test_expect_success 'pattern-checks: escaped "*"' ' - cat >repo/.git/info/sparse-checkout <<-\EOF && +test_expect_success BSLASHPSPEC 'pattern-checks: escaped "*"' ' + git clone repo escaped && + TREEOID=$(git -C escaped rev-parse HEAD:folder1) && + NEWTREE=$(git -C escaped mktree <<-EOF + $(git -C escaped ls-tree HEAD) + 040000 tree $TREEOID zbad\\dir + 040000 tree $TREEOID zdoes*exist + EOF + ) && + COMMIT=$(git -C escaped commit-tree $NEWTREE -p HEAD) && + git -C escaped reset --hard $COMMIT && + check_files escaped "a deep folder1 folder2 zbad\\dir zdoes*exist" && + git -C escaped sparse-checkout init --cone && + cat >escaped/.git/info/sparse-checkout <<-\EOF && /* !/*/ - /does\*not\*exist/ + /zbad\\dir/ + !/zbad\\dir/*/ + /zdoes\*not\*exist/ + /zdoes\*exist/ EOF - check_read_tree_errors repo "a" "" + check_read_tree_errors escaped "a zbad\\dir zdoes*exist" ' test_done -- cgit v1.3