From ae39ba431ab861548eb60b4bd2e1d8b8813db76f Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 15 Oct 2021 12:13:56 -0400 Subject: grep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we attempt to grep non-ascii log message text with an ascii pattern, we run into the following issue: $ git log --color --author='.var.*Bjar' -1 origin/master | grep ^Author grep: (standard input): binary file matches So, to fix this teach the grep code to use PCRE2_UTF, as long as the log output is encoded in UTF-8. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Hamza Mahfooz Signed-off-by: Junio C Hamano --- grep.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'grep.c') diff --git a/grep.c b/grep.c index fe847a0111..f6e113e9f0 100644 --- a/grep.c +++ b/grep.c @@ -382,8 +382,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt } options |= PCRE2_CASELESS; } - if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) && - !(!opt->ignore_case && (p->fixed || p->is_fixed))) + if ((!opt->ignore_locale && !has_non_ascii(p->pattern)) || + (!opt->ignore_locale && is_utf8_locale() && + has_non_ascii(p->pattern) && !(!opt->ignore_case && + (p->fixed || p->is_fixed)))) options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF); #ifdef GIT_PCRE2_VERSION_10_36_OR_HIGHER -- cgit v1.3