diff options
| author | Junio C Hamano <gitster@pobox.com> | 2023-01-27 08:51:40 -0800 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2023-01-27 08:51:40 -0800 |
| commit | 557d93a146b3f01e4e90c7c34fb9e9809ef95a81 (patch) | |
| tree | 56223babb8f92a2e28038edac261aaef874bd06d | |
| parent | 3e6417681c9b9498995efb3e26e8e05e8a1088c3 (diff) | |
| parent | acabd2048ee0ee53728100408970ab45a6dab65e (diff) | |
| download | git-557d93a146b3f01e4e90c7c34fb9e9809ef95a81.tar.xz | |
Merge branch 'cb/grep-pcre-ucp'
"grep -P" learned to use Unicode Character Property to grok
character classes when processing \b and \w etc.
* cb/grep-pcre-ucp:
grep: correctly identify utf-8 characters with \{b,w} in -P
| -rw-r--r-- | grep.c | 2 | ||||
| -rwxr-xr-x | t/perf/p7822-grep-perl-character.sh | 42 |
2 files changed, 43 insertions, 1 deletions
@@ -293,7 +293,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt options |= PCRE2_CASELESS; } if (!opt->ignore_locale && is_utf8_locale() && !literal) - options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF); + options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF); #ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */ diff --git a/t/perf/p7822-grep-perl-character.sh b/t/perf/p7822-grep-perl-character.sh new file mode 100755 index 0000000000..87009c60df --- /dev/null +++ b/t/perf/p7822-grep-perl-character.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +test_description="git-grep's perl regex + +If GIT_PERF_GREP_THREADS is set to a list of threads (e.g. '1 4 8' +etc.) we will test the patterns under those numbers of threads. +" + +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +if test -n "$GIT_PERF_GREP_THREADS" +then + test_set_prereq PERF_GREP_ENGINES_THREADS +fi + +for pattern in \ + '\\bhow' \ + '\\bÆvar' \ + '\\d+ \\bÆvar' \ + '\\bBelón\\b' \ + '\\w{12}\\b' +do + echo '$pattern' >pat + if ! test_have_prereq PERF_GREP_ENGINES_THREADS + then + test_perf "grep -P '$pattern'" --prereq PCRE " + git -P grep -f pat || : + " + else + for threads in $GIT_PERF_GREP_THREADS + do + test_perf "grep -P '$pattern' with $threads threads" --prereq PTHREADS,PCRE " + git -c grep.threads=$threads -P grep -f pat || : + " + done + fi +done + +test_done |
