From ce163c793d2058056ccc57563c350fd6415397ca Mon Sep 17 00:00:00 2001 From: RenĂ© Scharfe Date: Tue, 3 Mar 2009 00:00:55 +0100 Subject: diffcore-pickaxe: use memmem() Use memmem() instead of open-coding it. The system libraries usually have a much faster version than the memcmp()-loop here. Even our own fall-back in compat/, which is used on Windows, is slightly faster. The following commands were run in a Linux kernel repository and timed, the best of five results is shown: $ STRING='Ensure that the real time constraints are schedulable.' $ git log -S"$STRING" HEAD -- kernel/sched.c >/dev/null On Ubuntu 8.10 x64, before (v1.6.2-rc2): 8.09user 0.04system 0:08.14elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+30952minor)pagefaults 0swaps And with the patch: 1.50user 0.04system 0:01.54elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+30645minor)pagefaults 0swaps On Fedora 10 x64, before: 8.34user 0.05system 0:08.39elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+29268minor)pagefaults 0swaps And with the patch: 1.15user 0.05system 0:01.20elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+32253minor)pagefaults 0swaps On Windows Vista x64, before: real 0m9.204s user 0m0.000s sys 0m0.000s And with the patch: real 0m8.470s user 0m0.000s sys 0m0.000s Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index af9fffe6e8..574b3e8337 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -10,7 +10,7 @@ static unsigned int contains(struct diff_filespec *one, regex_t *regexp) { unsigned int cnt; - unsigned long offset, sz; + unsigned long sz; const char *data; if (diff_populate_filespec(one, 0)) return 0; @@ -33,15 +33,13 @@ static unsigned int contains(struct diff_filespec *one, } } else { /* Classic exact string match */ - /* Yes, I've heard of strstr(), but the thing is *data may - * not be NUL terminated. Sue me. - */ - for (offset = 0; offset + len <= sz; offset++) { - /* we count non-overlapping occurrences of needle */ - if (!memcmp(needle, data + offset, len)) { - offset += len - 1; - cnt++; - } + while (sz) { + const char *found = memmem(data, sz, needle, len); + if (!found) + break; + sz -= found - data + len; + data = found + len; + cnt++; } } diff_free_filespec_data(one); -- cgit v1.3 From 50fd6997c60f926efe4b3591f6c0a37568d02185 Mon Sep 17 00:00:00 2001 From: RenĂ© Scharfe Date: Mon, 16 Mar 2009 19:38:42 +0100 Subject: pickaxe: count regex matches only once When --pickaxe-regex is used, forward past the end of matches instead of advancing to the byte after their start. This way matches count only once, even if the regular expression matches their tail -- like in the fixed-string fork of the code. E.g.: /.*/ used to count the number of bytes instead of the number of lines. /aa/ resulted in a count of two in "aaa" instead of one. Also document the fact that regexec() needs a NUL-terminated string as its second argument by adding an assert(). Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- diffcore-pickaxe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'diffcore-pickaxe.c') diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 574b3e8337..d0ef839700 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -25,10 +25,12 @@ static unsigned int contains(struct diff_filespec *one, regmatch_t regmatch; int flags = 0; + assert(data[sz] == '\0'); while (*data && !regexec(regexp, data, 1, ®match, flags)) { flags |= REG_NOTBOL; - data += regmatch.rm_so; - if (*data) data++; + data += regmatch.rm_eo; + if (*data && regmatch.rm_so == regmatch.rm_eo) + data++; cnt++; } -- cgit v1.3