From 80c49c3de2d5a3aa12b0980a65f1163c8aef0c16 Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Sat, 17 Jan 2009 17:29:48 +0100 Subject: [PATCH] color-words: make regex configurable via attributes Make the --color-words splitting regular expression configurable via the diff driver's 'wordregex' attribute. The user can then set the driver on a file in .gitattributes. If a regex is given on the command line, it overrides the driver's setting. We also provide built-in regexes for the languages that already had funcname patterns, and add an appropriate diff driver entry for C/++. (The patterns are designed to run UTF-8 sequences into a single chunk to make sure they remain readable.) Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- Documentation/diff-options.txt | 4 ++ Documentation/gitattributes.txt | 21 +++++++++ diff.c | 10 +++++ t/t4034-diff-words.sh | 36 +++++++++++++++ userdiff.c | 78 ++++++++++++++++++++++++++------- userdiff.h | 1 + 6 files changed, 135 insertions(+), 15 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 8689a92d8d..1edb82e8e1 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -102,6 +102,10 @@ differences. You may want to append `|[^[:space:]]` to your regular expression to make sure that it matches all non-whitespace characters. A match that contains a newline is silently truncated(!) at the newline. ++ +The regex can also be set via a diff driver, see +linkgit:gitattributes[1]; giving it explicitly overrides any diff +driver setting. --no-renames:: Turn off rename detection, even when the configuration diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt index 8af22eccac..ba3ba12730 100644 --- a/Documentation/gitattributes.txt +++ b/Documentation/gitattributes.txt @@ -317,6 +317,8 @@ patterns are available: - `bibtex` suitable for files with BibTeX coded references. +- `cpp` suitable for source code in the C and C++ languages. + - `html` suitable for HTML/XHTML documents. - `java` suitable for source code in the Java language. @@ -334,6 +336,25 @@ patterns are available: - `tex` suitable for source code for LaTeX documents. +Customizing word diff +^^^^^^^^^^^^^^^^^^^^^ + +You can customize the rules that `git diff --color-words` uses to +split words in a line, by specifying an appropriate regular expression +in the "diff.*.wordregex" configuration variable. For example, in TeX +a backslash followed by a sequence of letters forms a command, but +several such commands can be run together without intervening +whitespace. To separate them, use a regular expression such as + +------------------------ +[diff "tex"] + wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{}[:space:]]+" +------------------------ + +A built-in pattern is provided for all languages listed in the +previous section. + + Performing text diffs of binary files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/diff.c b/diff.c index 00c661f82e..9fcde963db 100644 --- a/diff.c +++ b/diff.c @@ -1380,6 +1380,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } +static const char *userdiff_word_regex(struct diff_filespec *one) +{ + diff_filespec_load_driver(one); + return one->driver->word_regex; +} + void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) { if (!options->a_prefix) @@ -1540,6 +1546,10 @@ static void builtin_diff(const char *name_a, ecbdata.diff_words = xcalloc(1, sizeof(struct diff_words_data)); ecbdata.diff_words->file = o->file; + if (!o->word_regex) + o->word_regex = userdiff_word_regex(one); + if (!o->word_regex) + o->word_regex = userdiff_word_regex(two); if (o->word_regex) { ecbdata.diff_words->word_regex = (regex_t *) xmalloc(sizeof(regex_t)); diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 4873486301..744221bef9 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -84,6 +84,41 @@ test_expect_success 'word diff with a regular expression' ' ' +test_expect_success 'set a diff driver' ' + git config diff.testdriver.wordregex "[^[:space:]]" && + cat < .gitattributes +pre diff=testdriver +post diff=testdriver +EOF +' + +test_expect_success 'option overrides default' ' + + word_diff --color-words="[a-z]+" + +' + +cat > expect <<\EOF +diff --git a/pre b/post +index 330b04f..5ed8eff 100644 +--- a/pre ++++ b/post +@@ -1,3 +1,7 @@ +h(4),hh[44] + +a = b + c + +aa = a + +aeff = aeff * ( aaa ) +EOF + +test_expect_success 'use default supplied by driver' ' + + word_diff --color-words + +' + echo 'aaa (aaa)' > pre echo 'aaa (aaa) aaa' > post @@ -100,6 +135,7 @@ test_expect_success 'test parsing words for newline' ' word_diff --color-words="a+" + ' echo '(:' > pre diff --git a/userdiff.c b/userdiff.c index 3681062ebf..2b55509485 100644 --- a/userdiff.c +++ b/userdiff.c @@ -6,14 +6,20 @@ static struct userdiff_driver *drivers; static int ndrivers; static int drivers_alloc; -#define FUNCNAME(name, pattern) \ - { name, NULL, -1, { pattern, REG_EXTENDED } } +#define PATTERNS(name, pattern, wordregex) \ + { name, NULL, -1, { pattern, REG_EXTENDED }, wordregex } static struct userdiff_driver builtin_drivers[] = { -FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"), -FUNCNAME("java", +PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", + "[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("java", "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n" - "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"), -FUNCNAME("objc", + "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$", + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=" + "|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("objc", /* Negate C statements that can look like functions */ "!^[ \t]*(do|for|if|else|return|switch|while)\n" /* Objective-C methods */ @@ -21,20 +27,60 @@ FUNCNAME("objc", /* C functions */ "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n" /* Objective-C class/protocol definitions */ - "^(@(implementation|interface|protocol)[ \t].*)$"), -FUNCNAME("pascal", + "^(@(implementation|interface|protocol)[ \t].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("pascal", "^((procedure|function|constructor|destructor|interface|" "implementation|initialization|finalization)[ \t]*.*)$" "\n" - "^(.*=[ \t]*(class|record).*)$"), -FUNCNAME("php", "^[\t ]*((function|class).*)"), -FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"), -FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"), -FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"), -FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"), + "^(.*=[ \t]*(class|record).*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" + "|<>|<=|>=|:=|\\.\\." + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("php", "^[\t ]*((function|class).*)", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+" + "|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), +PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?" + "|[^[:space:]|[\x80-\xff]+"), + /* -- */ +PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$", + /* -- */ + "(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?." + "|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~" + "|[^[:space:]|[\x80-\xff]+"), +PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$", + "[={}\"]|[^={}\" \t]+"), +PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$", + "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"), +PATTERNS("cpp", + /* Jump targets or access declarations */ + "!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n" + /* C/++ functions/methods at top level */ + "^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n" + /* compound type at top level */ + "^((struct|class|enum)[^;]*)$", + /* -- */ + "[a-zA-Z_][a-zA-Z0-9_]*" + "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?" + "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->" + "|[^[:space:]]|[\x80-\xff]+"), { "default", NULL, -1, { NULL, 0 } }, }; -#undef FUNCNAME +#undef PATTERNS static struct userdiff_driver driver_true = { "diff=true", @@ -134,6 +180,8 @@ int userdiff_config(const char *k, const char *v) return parse_string(&drv->external, k, v); if ((drv = parse_driver(k, v, "textconv"))) return parse_string(&drv->textconv, k, v); + if ((drv = parse_driver(k, v, "wordregex"))) + return parse_string(&drv->word_regex, k, v); return 0; } diff --git a/userdiff.h b/userdiff.h index ba2945770b..c3151594f5 100644 --- a/userdiff.h +++ b/userdiff.h @@ -11,6 +11,7 @@ struct userdiff_driver { const char *external; int binary; struct userdiff_funcname funcname; + const char *word_regex; const char *textconv; };