From d9bae1a178f0f8b198ea611e874975214ad6f990 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 1 Apr 2010 20:12:15 -0400 Subject: diff: cache textconv output Running a textconv filter can take a long time. It's particularly bad for a large file which needs to be spooled to disk, but even for small files, the fork+exec overhead can add up for something like "git log -p". This patch uses the notes-cache mechanism to keep a fast cache of textconv output. Caches are stored in refs/notes/textconv/$x, where $x is the userdiff driver defined in gitattributes. Caching is enabled only if diff.$x.cachetextconv is true. In my test repo, on a commit with 45 jpg and avi files changed and a textconv to show their exif tags: [before] $ time git show >/dev/null real 0m13.724s user 0m12.057s sys 0m1.624s [after, first run] $ git config diff.mfo.cachetextconv true $ time git show >/dev/null real 0m14.252s user 0m12.197s sys 0m1.800s [after, subsequent runs] $ time git show >/dev/null real 0m0.352s user 0m0.148s sys 0m0.200s So for a slight (3.8%) cost on the first run, we achieve an almost 40x speed up on subsequent runs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- userdiff.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index df992490d5..67003fbb23 100644 --- a/userdiff.c +++ b/userdiff.c @@ -1,3 +1,4 @@ +#include "cache.h" #include "userdiff.h" #include "cache.h" #include "attr.h" @@ -167,6 +168,12 @@ static int parse_tristate(int *b, const char *k, const char *v) return 1; } +static int parse_bool(int *b, const char *k, const char *v) +{ + *b = git_config_bool(k, v); + return 1; +} + int userdiff_config(const char *k, const char *v) { struct userdiff_driver *drv; @@ -181,6 +188,8 @@ int userdiff_config(const char *k, const char *v) return parse_string(&drv->external, k, v); if ((drv = parse_driver(k, v, "textconv"))) return parse_string(&drv->textconv, k, v); + if ((drv = parse_driver(k, v, "cachetextconv"))) + return parse_bool(&drv->textconv_want_cache, k, v); if ((drv = parse_driver(k, v, "wordregex"))) return parse_string(&drv->word_regex, k, v); -- cgit v1.3 From 6d2f208c3dd39493f4d45ea67c55a1b7fe06626a Mon Sep 17 00:00:00 2001 From: Björn Steinbrink Date: Sun, 23 May 2010 20:05:40 +0200 Subject: diff: Support visibility modifiers in the PHP hunk header regexp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting with PHP5, class methods can have a visibility modifier, which caused the methods not to be matched by the existing regexp, so extend the regexp to match those modifiers. And while we're at it, allow the "static" modifier as well. Since the "static" modifier can appear either before or after the visibility modifier, let's just allow any number of modifiers to appear in any order, as that simplifies the regexp and shouldn't cause any false positives. Signed-off-by: Björn Steinbrink Signed-off-by: Junio C Hamano --- userdiff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index df992490d5..38563daa3c 100644 --- a/userdiff.c +++ b/userdiff.c @@ -44,7 +44,9 @@ PATTERNS("pascal", "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" "|<>|<=|>=|:=|\\.\\." "|[^[:space:]]|[\x80-\xff]+"), -PATTERNS("php", "^[\t ]*((function|class).*)", +PATTERNS("php", + "^[\t ]*(((public|protected|private|static)[\t ]+)*function.*)$\n" + "^[\t ]*(class.*)$", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" -- cgit v1.3 From b221207db9950cb0993a30f38540ff235a0af64e Mon Sep 17 00:00:00 2001 From: Petr Onderka Date: Mon, 16 Aug 2010 17:01:02 +0000 Subject: Userdiff patterns for C# Add userdiff patterns for C#. This code is an improved version of code by Adam Petaccia from 21 June 2009 mail to the list. Signed-off-by: Petr Onderka Acked-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 2 ++ t/t4018-diff-funcname.sh | 2 +- userdiff.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 564586b943..2e2370ccdb 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -441,6 +441,8 @@ patterns are available: - `cpp` suitable for source code in the C and C++ languages. +- `csharp` suitable for source code in the C# language. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 5b10e976a3..61de8a2718 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -32,7 +32,7 @@ EOF sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java -builtin_patterns="bibtex cpp html java objc pascal php python ruby tex" +builtin_patterns="bibtex cpp csharp html java objc pascal php python ruby tex" for p in $builtin_patterns do test_expect_success "builtin $p pattern compiles" ' diff --git a/userdiff.c b/userdiff.c index c49cc1b67e..e5522159b3 100644 --- a/userdiff.c +++ b/userdiff.c @@ -82,6 +82,22 @@ PATTERNS("cpp", "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("csharp", + /* Keywords */ + "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" + /* Methods and constructors */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[<>@._[:alnum:]]+[ \t]*\\(.*\\))[ \t]*$\n" + /* Properties */ + "^[ \t]*(((static|public|internal|private|protected|new|virtual|sealed|override|unsafe)[ \t]+)*[][<>@.~_[:alnum:]]+[ \t]+[@._[:alnum:]]+)[ \t]*$\n" + /* Type definitions */ + "^[ \t]*(((static|public|internal|private|protected|new|unsafe|sealed|abstract|partial)[ \t]+)*(class|enum|interface|struct)[ \t]+.*)$\n" + /* Namespace */ + "^[ \t]*(namespace[ \t]+.*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), { "default", NULL, -1, { NULL, 0 } }, }; #undef PATTERNS -- cgit v1.3 From 909a5494f869371565ed9326e984adeabf42611d Mon Sep 17 00:00:00 2001 From: Brandon Casey Date: Fri, 10 Sep 2010 11:18:14 -0500 Subject: userdiff.c: add builtin fortran regex patterns This adds fortran xfuncname and wordRegex patterns to the list of builtin patterns. The intention is for the patterns to be appropriate for all versions of fortran including 77, 90, 95. The patterns can be enabled by adding the diff=fortran attribute to the .gitattributes file for the desired file glob. This also adds a new macro named IPATTERN which is just like the PATTERNS macro except it sets the REG_ICASE flag so that case will be ignored. The test code in t4018 and the docs were updated as appropriate. Signed-off-by: Brandon Casey Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 2 ++ t/t4018-diff-funcname.sh | 2 +- userdiff.c | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index e5a27d875e..fbf507a7ee 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -477,6 +477,8 @@ patterns are available: - `csharp` suitable for source code in the C# language. +- `fortran` suitable for source code in the Fortran language. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 620cd02798..9a57898339 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -32,7 +32,7 @@ EOF sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java -builtin_patterns="bibtex cpp csharp html java objc pascal php python ruby tex" +builtin_patterns="bibtex cpp csharp fortran html java objc pascal php python ruby tex" for p in $builtin_patterns do test_expect_success "builtin $p pattern compiles" ' diff --git a/userdiff.c b/userdiff.c index e5522159b3..f9e05b548c 100644 --- a/userdiff.c +++ b/userdiff.c @@ -9,7 +9,23 @@ static int drivers_alloc; #define PATTERNS(name, pattern, word_regex) \ { name, NULL, -1, { pattern, REG_EXTENDED }, word_regex } +#define IPATTERN(name, pattern, word_regex) \ + { name, NULL, -1, { pattern, REG_EXTENDED | REG_ICASE }, word_regex } static struct userdiff_driver builtin_drivers[] = { +IPATTERN("fortran", + "!^([C*]|[ \t]*!)\n" + "!^[ \t]*MODULE[ \t]+PROCEDURE[ \t]\n" + "^[ \t]*((END[ \t]+)?(PROGRAM|MODULE|BLOCK[ \t]+DATA" + "|([^'\" \t]+[ \t]+)*(SUBROUTINE|FUNCTION))[ \t]+[A-Z].*)$", + /* -- */ + "[a-zA-Z][a-zA-Z0-9_]*" + "|\\.([Ee][Qq]|[Nn][Ee]|[Gg][TtEe]|[Ll][TtEe]|[Tt][Rr][Uu][Ee]|[Ff][Aa][Ll][Ss][Ee]|[Aa][Nn][Dd]|[Oo][Rr]|[Nn]?[Ee][Qq][Vv]|[Nn][Oo][Tt])\\." + /* numbers and format statements like 2E14.4, or ES12.6, 9X. + * Don't worry about format statements without leading digits since + * they would have been matched above as a variable anyway. */ + "|[-+]?[0-9.]+([AaIiDdEeFfLlTtXx][Ss]?[-+]?[0-9.]*)?(_[a-zA-Z0-9][a-zA-Z0-9_]*)?" + "|//|\\*\\*|::|[/<>=]=" + "|[^[:space:]]|[\x80-\xff]+"), PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), PATTERNS("java", @@ -101,6 +117,7 @@ PATTERNS("csharp", { "default", NULL, -1, { NULL, 0 } }, }; #undef PATTERNS +#undef IPATTERN static struct userdiff_driver driver_true = { "diff=true", -- cgit v1.3 From b34f69f9916e52e742f35c2f88286375142260de Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Sat, 18 Dec 2010 17:17:53 +0100 Subject: userdiff: fix typo in ruby and python word regexes Both had an unclosed ] that ruined the safeguard against not matching a non-space char. Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- userdiff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index f9e05b548c..2d5453697a 100644 --- a/userdiff.c +++ b/userdiff.c @@ -74,14 +74,14 @@ PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?" - "|[^[:space:]|[\x80-\xff]+"), + "|[^[:space:]]|[\x80-\xff]+"), /* -- */ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", /* -- */ "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~" - "|[^[:space:]|[\x80-\xff]+"), + "|[^[:space:]]|[\x80-\xff]+"), PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", "[={}\"]|[^={}\" \t]+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", -- cgit v1.3 From 71a5d4bc0e4025b3fbdeed76052b39fcef284e8c Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 26 Dec 2010 03:07:31 -0600 Subject: diff: funcname and word patterns for perl The default function name discovery already works quite well for Perl code... with the exception of here-documents (or rather their ending). sub foo { print < Signed-off-by: Junio C Hamano --- Documentation/gitattributes.txt | 2 ++ t/t4018-diff-funcname.sh | 2 +- userdiff.c | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 5a7f936429..e59b878293 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -494,6 +494,8 @@ patterns are available: - `pascal` suitable for source code in the Pascal/Delphi language. +- `perl` suitable for source code in the Perl language. + - `php` suitable for source code in the PHP language. - `python` suitable for source code in the Python language. diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 0a61b57b5f..3646930623 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -32,7 +32,7 @@ EOF sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java -builtin_patterns="bibtex cpp csharp fortran html java objc pascal php python ruby tex" +builtin_patterns="bibtex cpp csharp fortran html java objc pascal perl php python ruby tex" for p in $builtin_patterns do test_expect_success "builtin $p pattern compiles" ' diff --git a/userdiff.c b/userdiff.c index 2d5453697a..fc2afe33a7 100644 --- a/userdiff.c +++ b/userdiff.c @@ -61,6 +61,21 @@ PATTERNS("pascal", "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" "|<>|<=|>=|:=|\\.\\." "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("perl", + "^[ \t]*package .*;\n" + "^[ \t]*sub .* \\{", + /* -- */ + "[[:alpha:]_'][[:alnum:]_']*" + "|0[xb]?[0-9a-fA-F_]*" + /* taking care not to interpret 3..5 as (3.)(.5) */ + "|[0-9a-fA-F_]+(\\.[0-9a-fA-F_]+)?([eE][-+]?[0-9_]+)?" + "|=>|-[rwxoRWXOezsfdlpSugkbctTBMAC>]|~~|::" + "|&&=|\\|\\|=|//=|\\*\\*=" + "|&&|\\|\\||//|\\+\\+|--|\\*\\*|\\.\\.\\.?" + "|[-+*/%.^&<>=!|]=" + "|=~|!~" + "|<<|<>|<=>|>>" + "|[^[:space:]]"), PATTERNS("php", "^[\t ]*(((public|protected|private|static)[\t ]+)*function.*)$\n" "^[\t ]*(class.*)$", -- cgit v1.3 From a25e47377d6a1ec1efc6972f2e5e55cf429603a1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 27 Dec 2010 09:19:38 -0800 Subject: userdiff/perl: catch BEGIN/END/... and POD as headers Signed-off-by: Junio C Hamano --- userdiff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index fc2afe33a7..c384b39e4d 100644 --- a/userdiff.c +++ b/userdiff.c @@ -63,7 +63,9 @@ PATTERNS("pascal", "|[^[:space:]]|[\x80-\xff]+"), PATTERNS("perl", "^[ \t]*package .*;\n" - "^[ \t]*sub .* \\{", + "^[ \t]*sub .* \\{\n" + "^[A-Z]+ \\{\n" /* BEGIN, END, ... */ + "^=head[0-9] ", /* POD */ /* -- */ "[[:alpha:]_'][[:alnum:]_']*" "|0[xb]?[0-9a-fA-F_]*" -- cgit v1.3 From ad5b6942d5b9127cc940d2135ce82c633013418a Mon Sep 17 00:00:00 2001 From: Alexey Shumkin Date: Tue, 11 Jan 2011 11:53:59 +0300 Subject: userdiff: match Pascal class methods Class declarations were already covered by the second pattern, but class methods have the 'class' keyword in front too. Account for it. Signed-off-by: Alexey Shumkin Acked-by: Thomas Rast Signed-off-by: Junio C Hamano --- userdiff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index 2d5453697a..234697821d 100644 --- a/userdiff.c +++ b/userdiff.c @@ -52,7 +52,7 @@ PATTERNS("objc", "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" "|[^[:space:]]|[\x80-\xff]+"), PATTERNS("pascal", - "^((procedure|function|constructor|destructor|interface|" + "^(((class[ \t]+)?(procedure|function)|constructor|destructor|interface|" "implementation|initialization|finalization)[ \t]*.*)$" "\n" "^(.*=[ \t]*(class|record).*)$", -- cgit v1.3 From 664d44ee7fb18bdfdd66a1be760c7ee1bbe911c6 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 11 Jan 2011 15:48:50 -0600 Subject: userdiff: simplify word-diff safeguard git's diff-words support has a detail that can be a little dangerous: any text not matched by a given language's tokenization pattern is treated as whitespace and changes in such text would go unnoticed. Therefore each of the built-in regexes allows a special token type consisting of a single non-whitespace character [^[:space:]]. To make sure UTF-8 sequences remain human readable, the builtin regexes also have a special token type for runs of bytes with the high bit set. In English, non-ASCII characters are usually isolated so this is analogous to the [^[:space:]] pattern, except it matches a single _multibyte_ character despite use of the C locale. Unfortunately it is easy to make typos or forget entirely to include these catch-all token types when adding support for new languages (see v1.7.3.5~16, userdiff: fix typo in ruby and python word regexes, 2010-12-18). Avoid this by including them automatically within the PATTERNS and IPATTERN macros. While at it, change the UTF-8 sequence token type to match exactly one non-ASCII multi-byte character, rather than an arbitrary run of them. Suggested-by: Thomas Rast Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- userdiff.c | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'userdiff.c') diff --git a/userdiff.c b/userdiff.c index c384b39e4d..3a1c392bbd 100644 --- a/userdiff.c +++ b/userdiff.c @@ -8,9 +8,11 @@ static int ndrivers; static int drivers_alloc; #define PATTERNS(name, pattern, word_regex) \ - { name, NULL, -1, { pattern, REG_EXTENDED }, word_regex } + { name, NULL, -1, { pattern, REG_EXTENDED }, \ + word_regex "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+" } #define IPATTERN(name, pattern, word_regex) \ - { name, NULL, -1, { pattern, REG_EXTENDED | REG_ICASE }, word_regex } + { name, NULL, -1, { pattern, REG_EXTENDED | REG_ICASE }, \ + word_regex "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+" } static struct userdiff_driver builtin_drivers[] = { IPATTERN("fortran", "!^([C*]|[ \t]*!)\n" @@ -24,10 +26,9 @@ IPATTERN("fortran", * Don't worry about format statements without leading digits since * they would have been matched above as a variable anyway. */ "|[-+]?[0-9.]+([AaIiDdEeFfLlTtXx][Ss]?[-+]?[0-9.]*)?(_[a-zA-Z0-9][a-zA-Z0-9_]*)?" - "|//|\\*\\*|::|[/<>=]=" - "|[^[:space:]]|[\x80-\xff]+"), + "|//|\\*\\*|::|[/<>=]="), PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", - "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), + "[^<>= \t]+"), PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" "^[ \t]*(([A-Za-z_][A-Za-z_0-9]*[ \t]+)+[A-Za-z_][A-Za-z_0-9]*[ \t]*\\([^;]*)$", @@ -35,8 +36,7 @@ PATTERNS("java", "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" "|[-+*/<>%&^|=!]=" - "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|" - "|[^[:space:]]|[\x80-\xff]+"), + "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"), PATTERNS("objc", /* Negate C statements that can look like functions */ "!^[ \t]*(do|for|if|else|return|switch|while)\n" @@ -49,8 +49,7 @@ PATTERNS("objc", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" - "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" - "|[^[:space:]]|[\x80-\xff]+"), + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"), PATTERNS("pascal", "^((procedure|function|constructor|destructor|interface|" "implementation|initialization|finalization)[ \t]*.*)$" @@ -59,8 +58,7 @@ PATTERNS("pascal", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" - "|<>|<=|>=|:=|\\.\\." - "|[^[:space:]]|[\x80-\xff]+"), + "|<>|<=|>=|:=|\\.\\."), PATTERNS("perl", "^[ \t]*package .*;\n" "^[ \t]*sub .* \\{\n" @@ -76,33 +74,29 @@ PATTERNS("perl", "|&&|\\|\\||//|\\+\\+|--|\\*\\*|\\.\\.\\.?" "|[-+*/%.^&<>=!|]=" "|=~|!~" - "|<<|<>|<=>|>>" - "|[^[:space:]]"), + "|<<|<>|<=>|>>"), PATTERNS("php", "^[\t ]*(((public|protected|private|static)[\t ]+)*function.*)$\n" "^[\t ]*(class.*)$", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" - "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->" - "|[^[:space:]]|[\x80-\xff]+"), + "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"), PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" - "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?" - "|[^[:space:]]|[\x80-\xff]+"), + "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"), /* -- */ PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", /* -- */ "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." - "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~" - "|[^[:space:]]|[\x80-\xff]+"), + "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"), PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", "[={}\"]|[^={}\" \t]+"), PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", - "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"), + "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"), PATTERNS("cpp", /* Jump targets or access declarations */ "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n" @@ -113,8 +107,7 @@ PATTERNS("cpp", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" - "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" - "|[^[:space:]]|[\x80-\xff]+"), + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"), PATTERNS("csharp", /* Keywords */ "!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n" @@ -129,8 +122,7 @@ PATTERNS("csharp", /* -- */ "[a-zA-Z_][a-zA-Z0-9_]*" "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" - "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" - "|[^[:space:]]|[\x80-\xff]+"), + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"), { "default", NULL, -1, { NULL, 0 } }, }; #undef PATTERNS -- cgit v1.3 From f12c66b9bb851aa7350d40370e6adf78535c5930 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 21 May 2011 14:29:01 -0500 Subject: userdiff/perl: anchor "sub" and "package" patterns on the left MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The userdiff funcname mechanism has no concept of nested scopes --- instead, "git diff" and "git grep --show-function" simply label the diff header with the most recent matching line. Unfortunately that means text following a subroutine in a POD section: =head1 DESCRIPTION You might use this facility like so: sub example { foo; } Now, having said that, let's say more about the facility. Blah blah blah ... etc etc. gets the subroutine name instead of the POD header in its diff/grep funcname header, making it harder to get oriented when reading a diff without enough context. The fix is simple: anchor the funcname syntax to the left margin so nested subroutines and packages like this won't get picked up. (The builtin C++ funcname pattern already does the same thing.) This means the userdiff driver will misparse the idiom { my $static; sub foo { ... use $static ... } } but I think that's worth it; we can revisit this later if the userdiff mechanism learns to keep track of the beginning and end of nested scopes. Reported-by: Ævar Arnfjörð Bjarmason Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t4018-diff-funcname.sh | 59 +++++++++++++++++++++++++++++++++++++++++++++--- userdiff.c | 4 ++-- 2 files changed, 58 insertions(+), 5 deletions(-) (limited to 'userdiff.c') diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index ad74c605a4..f071a8fdd1 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -29,6 +29,47 @@ public class Beer } EOF sed 's/beer\\/beer,\\/' Beer-correct.java +cat >Beer.perl <<\EOF +package Beer; + +use strict; +use warnings; +use parent qw(Exporter); +our @EXPORT_OK = qw(round); + +sub round { + my ($n) = @_; + print "$n bottles of beer on the wall "; + print "$n bottles of beer\n"; + print "Take one down, pass it around, "; + $n = $n - 1; + print "$n bottles of beer on the wall.\n"; +} + +__END__ + +=head1 NAME + +Beer - subroutine to output fragment of a drinking song + +=head1 SYNOPSIS + + use Beer qw(round); + + sub song { + for (my $i = 99; $i > 0; $i--) { + round $i; + } + } + + song; + +=cut +EOF +sed -e ' + s/beer\\/beer,\\/ + s/song;/song();/ +' Beer-correct.perl test_config () { git config "$1" "$2" && @@ -36,8 +77,9 @@ test_config () { } test_expect_funcname () { - test_expect_code 1 git diff --no-index \ - Beer.java Beer-correct.java >diff && + lang=${2-java} + test_expect_code 1 git diff --no-index -U1 \ + "Beer.$lang" "Beer-correct.$lang" >diff && grep "^@@.*@@ $1" diff } @@ -65,13 +107,24 @@ test_expect_success 'default behaviour' ' ' test_expect_success 'set up .gitattributes declaring drivers to test' ' - echo "*.java diff=java" >.gitattributes + cat >.gitattributes <<-\EOF + *.java diff=java + *.perl diff=perl + EOF ' test_expect_success 'preset java pattern' ' test_expect_funcname "public static void main(" ' +test_expect_success 'preset perl pattern' ' + test_expect_funcname "sub round {\$" perl +' + +test_expect_success 'perl pattern is not distracted by sub within POD' ' + test_expect_funcname "=head" perl +' + test_expect_success 'custom pattern' ' test_config diff.java.funcname "!static !String diff --git a/userdiff.c b/userdiff.c index 1ff47977d5..2cca0af8e2 100644 --- a/userdiff.c +++ b/userdiff.c @@ -60,8 +60,8 @@ PATTERNS("pascal", "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" "|<>|<=|>=|:=|\\.\\."), PATTERNS("perl", - "^[ \t]*package .*;\n" - "^[ \t]*sub .* \\{\n" + "^package .*;\n" + "^sub .* \\{\n" "^[A-Z]+ \\{\n" /* BEGIN, END, ... */ "^=head[0-9] ", /* POD */ /* -- */ -- cgit v1.3 From 12f0967a8a1e3c11c678de181f77d1c7883b37cf Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 21 May 2011 14:35:51 -0500 Subject: userdiff/perl: match full line of POD headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The builtin perl userdiff driver is not greedy enough about catching POD header lines. Capture the whole line, so instead of just declaring that we are in some "@@ =head1" section, diff/grep output can explain that the enclosing section is about "@@ =head1 OPTIONS". Reported-by: Ævar Arnfjörð Bjarmason Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t4018-diff-funcname.sh | 4 ++++ userdiff.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'userdiff.c') diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index f071a8fdd1..8a5714912d 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -125,6 +125,10 @@ test_expect_success 'perl pattern is not distracted by sub within POD' ' test_expect_funcname "=head" perl ' +test_expect_success 'perl pattern gets full line of POD header' ' + test_expect_funcname "=head1 SYNOPSIS\$" perl +' + test_expect_success 'custom pattern' ' test_config diff.java.funcname "!static !String diff --git a/userdiff.c b/userdiff.c index 2cca0af8e2..32ead9654a 100644 --- a/userdiff.c +++ b/userdiff.c @@ -63,7 +63,7 @@ PATTERNS("perl", "^package .*;\n" "^sub .* \\{\n" "^[A-Z]+ \\{\n" /* BEGIN, END, ... */ - "^=head[0-9] ", /* POD */ + "^=head[0-9] .*", /* POD */ /* -- */ "[[:alpha:]_'][[:alnum:]_']*" "|0[xb]?[0-9a-fA-F_]*" -- cgit v1.3 From ea2ca4497bdb716977a3e2526780635cb6bac513 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 21 May 2011 14:38:26 -0500 Subject: userdiff/perl: catch sub with brace on second line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accept sub foo { } as an alternative to a more common style that introduces perl functions with a brace on the first line (and likewise for BEGIN/END blocks). The new regex is a little hairy to avoid matching # forward declaration sub foo; while continuing to match "sub foo($;@) {" and sub foo { # This routine is interesting; # in fact, the lines below explain how... While at it, pay attention to Perl 5.14's "package foo {" syntax as an alternative to the traditional "package foo;". Requested-by: Ævar Arnfjörð Bjarmason Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t4018-diff-funcname.sh | 25 +++++++++++++++++++++++-- userdiff.c | 20 +++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'userdiff.c') diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index 8a5714912d..b2fd1a99da 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -35,7 +35,11 @@ package Beer; use strict; use warnings; use parent qw(Exporter); -our @EXPORT_OK = qw(round); +our @EXPORT_OK = qw(round finalround); + +sub other; # forward declaration + +# hello sub round { my ($n) = @_; @@ -46,6 +50,12 @@ sub round { print "$n bottles of beer on the wall.\n"; } +sub finalround +{ + print "Go to the store, buy some more\n"; + print "99 bottles of beer on the wall.\n"); +} + __END__ =head1 NAME @@ -54,12 +64,13 @@ Beer - subroutine to output fragment of a drinking song =head1 SYNOPSIS - use Beer qw(round); + use Beer qw(round finalround); sub song { for (my $i = 99; $i > 0; $i--) { round $i; } + finalround; } song; @@ -67,7 +78,9 @@ Beer - subroutine to output fragment of a drinking song =cut EOF sed -e ' + s/hello/goodbye/ s/beer\\/beer,\\/ + s/more\\/more,\\/ s/song;/song();/ ' Beer-correct.perl @@ -121,6 +134,10 @@ test_expect_success 'preset perl pattern' ' test_expect_funcname "sub round {\$" perl ' +test_expect_success 'perl pattern accepts K&R style brace placement, too' ' + test_expect_funcname "sub finalround\$" perl +' + test_expect_success 'perl pattern is not distracted by sub within POD' ' test_expect_funcname "=head" perl ' @@ -129,6 +146,10 @@ test_expect_success 'perl pattern gets full line of POD header' ' test_expect_funcname "=head1 SYNOPSIS\$" perl ' +test_expect_success 'perl pattern is not distracted by forward declaration' ' + test_expect_funcname "package Beer;\$" perl +' + test_expect_success 'custom pattern' ' test_config diff.java.funcname "!static !String diff --git a/userdiff.c b/userdiff.c index 32ead9654a..42b86ac63d 100644 --- a/userdiff.c +++ b/userdiff.c @@ -60,9 +60,23 @@ PATTERNS("pascal", "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" "|<>|<=|>=|:=|\\.\\."), PATTERNS("perl", - "^package .*;\n" - "^sub .* \\{\n" - "^[A-Z]+ \\{\n" /* BEGIN, END, ... */ + "^package .*\n" + "^sub [[:alnum:]_':]+[ \t]*" + "(\\([^)]*\\)[ \t]*)?" /* prototype */ + /* + * Attributes. A regex can't count nested parentheses, + * so just slurp up whatever we see, taking care not + * to accept lines like "sub foo; # defined elsewhere". + * + * An attribute could contain a semicolon, but at that + * point it seems reasonable enough to give up. + */ + "(:[^;#]*)?" + "(\\{[ \t]*)?" /* brace can come here or on the next line */ + "(#.*)?$\n" /* comment */ + "^[A-Z]+[ \t]*" /* BEGIN, END, ... */ + "(\\{[ \t]*)?" /* brace can come here or on the next line */ + "(#.*)?$\n" "^=head[0-9] .*", /* POD */ /* -- */ "[[:alpha:]_'][[:alnum:]_']*" -- cgit v1.3 From f143d9c695cd4c3e86069c536fa0dff04fc93e93 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 22 May 2011 12:29:32 -0500 Subject: userdiff/perl: tighten BEGIN/END block pattern to reject here-doc delimiters A naive method of treating BEGIN/END blocks with a brace on the second line as diff/grep funcname context involves also matching unrelated lines that consist of all-caps letters: sub foo { print <<'EOF' text goes here ... EOF ... rest of foo ... } That's not so great, because it means that "git diff" and "git grep --show-function" would write "=EOF" or "@@ EOF" as context instead of a more useful reminder like "@@ sub foo {". To avoid this, tighten the pattern to only match the special block names that perl accepts (namely BEGIN, END, INIT, CHECK, UNITCHECK, AUTOLOAD, and DESTROY). The list is taken from perl's toke.c. Suggested-by: Jakub Narebski Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t4018-diff-funcname.sh | 17 +++++++++++++++-- userdiff.c | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'userdiff.c') diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh index b2fd1a99da..b68c56b68c 100755 --- a/t/t4018-diff-funcname.sh +++ b/t/t4018-diff-funcname.sh @@ -29,7 +29,7 @@ public class Beer } EOF sed 's/beer\\/beer,\\/' Beer-correct.java -cat >Beer.perl <<\EOF +cat >Beer.perl <<\EOT package Beer; use strict; @@ -56,6 +56,15 @@ sub finalround print "99 bottles of beer on the wall.\n"); } +sub withheredocument { + print <<"EOF" +decoy here-doc +EOF + # some lines of context + # to pad it out + print "hello\n"; +} + __END__ =head1 NAME @@ -76,7 +85,7 @@ Beer - subroutine to output fragment of a drinking song song; =cut -EOF +EOT sed -e ' s/hello/goodbye/ s/beer\\/beer,\\/ @@ -138,6 +147,10 @@ test_expect_success 'perl pattern accepts K&R style brace placement, too' ' test_expect_funcname "sub finalround\$" perl ' +test_expect_success 'but is not distracted by end of <