From 0324e8fc6b297c9e61745dc4e7d110780334157d Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Tue, 4 May 2021 09:27:34 +0000 Subject: word diff: handle zero length matches If find_word_boundaries() encounters a zero length match (which can be caused by matching a newline or using '*' instead of '+' in the regex) we stop splitting the input into words which generates an inaccurate diff. To fix this increment the start point when there is a zero length match and try a new match. This is safe as posix regular expressions always return the longest available match so a zero length match means there are no longer matches available from the current position. Commit bf82940dbf1 (color-words: enable REG_NEWLINE to help user, 2009-01-17) prevented matching newlines in negated character classes but it is still possible for the user to have an explicit newline match in the regex which could cause a zero length match. One could argue that having explicit newline matches or using '*' rather than '+' are user errors but it seems to be better to work round them than produce inaccurate diffs. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- diff.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'diff.c') diff --git a/diff.c b/diff.c index 4acccd9d7e..c8b1d72434 100644 --- a/diff.c +++ b/diff.c @@ -2053,7 +2053,7 @@ static void fn_out_diff_words_aux(void *priv, static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, int *begin, int *end) { - if (word_regex && *begin < buffer->size) { + while (word_regex && *begin < buffer->size) { regmatch_t match[1]; if (!regexec_buf(word_regex, buffer->ptr + *begin, buffer->size - *begin, 1, match, 0)) { @@ -2061,9 +2061,13 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, '\n', match[0].rm_eo - match[0].rm_so); *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; *begin += match[0].rm_so; - return *begin >= *end; + if (*begin == *end) + (*begin)++; + else + return *begin > *end; + } else { + return -1; } - return -1; } /* find the next word */ -- cgit v1.3