diff --git a/grep.c b/grep.c index 451275d298..627ae3e3e8 100644 --- a/grep.c +++ b/grep.c @@ -5,6 +5,7 @@ #include "diff.h" #include "diffcore.h" #include "commit.h" +#include "quote.h" static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs); @@ -397,6 +398,28 @@ static int is_fixed(const char *s, size_t len) return 1; } +static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) +{ + struct strbuf sb = STRBUF_INIT; + int err; + int regflags; + + basic_regex_quote_buf(&sb, p->pattern); + regflags = opt->regflags & ~REG_EXTENDED; + if (opt->ignore_case) + regflags |= REG_ICASE; + err = regcomp(&p->regexp, sb.buf, regflags); + if (opt->debug) + fprintf(stderr, "fixed %s\n", sb.buf); + strbuf_release(&sb); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, sizeof(errbuf)); + regfree(&p->regexp); + compile_regexp_failed(p, errbuf); + } +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int icase, ascii_only; @@ -407,8 +430,20 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) icase = opt->regflags & REG_ICASE || p->ignore_case; ascii_only = !has_non_ascii(p->pattern); + /* + * Even when -F (fixed) asks us to do a non-regexp search, we + * may not be able to correctly case-fold when -i + * (ignore-case) is asked (in which case, we'll synthesize a + * regexp to match the pattern that matches regexp special + * characters literally, while ignoring case differences). On + * the other hand, even without -F, if the pattern does not + * have any regexp special characters and there is no need for + * case-folding search, we can internally turn it into a + * simple string match using kws. p->fixed tells us if we + * want to use kws. + */ if (opt->fixed) - p->fixed = 1; + p->fixed = !icase || ascii_only; else if ((!icase || ascii_only) && is_fixed(p->pattern, p->patternlen)) p->fixed = 1; @@ -423,6 +458,14 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; + } else if (opt->fixed) { + /* + * We come here when the pattern has the non-ascii + * characters we cannot case-fold, and asked to + * ignore-case. + */ + compile_fixed_regexp(p, opt); + return; } if (opt->pcre) { diff --git a/quote.c b/quote.c index fe884d2452..c67adb718c 100644 --- a/quote.c +++ b/quote.c @@ -440,3 +440,40 @@ void tcl_quote_buf(struct strbuf *sb, const char *src) } strbuf_addch(sb, '"'); } + +void basic_regex_quote_buf(struct strbuf *sb, const char *src) +{ + char c; + + if (*src == '^') { + /* only beginning '^' is special and needs quoting */ + strbuf_addch(sb, '\\'); + strbuf_addch(sb, *src++); + } + if (*src == '*') + /* beginning '*' is not special, no quoting */ + strbuf_addch(sb, *src++); + + while ((c = *src++)) { + switch (c) { + case '[': + case '.': + case '\\': + case '*': + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + case '$': + /* only the end '$' is special and needs quoting */ + if (*src == '\0') + strbuf_addch(sb, '\\'); + strbuf_addch(sb, c); + break; + + default: + strbuf_addch(sb, c); + break; + } + } +} diff --git a/quote.h b/quote.h index 99e04d34bf..362d315bec 100644 --- a/quote.h +++ b/quote.h @@ -67,5 +67,6 @@ extern char *quote_path_relative(const char *in, const char *prefix, extern void perl_quote_buf(struct strbuf *sb, const char *src); extern void python_quote_buf(struct strbuf *sb, const char *src); extern void tcl_quote_buf(struct strbuf *sb, const char *src); +extern void basic_regex_quote_buf(struct strbuf *sb, const char *src); #endif diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh index b78a774dab..1929809d4a 100755 --- a/t/t7812-grep-icase-non-ascii.sh +++ b/t/t7812-grep-icase-non-ascii.sh @@ -20,4 +20,30 @@ test_expect_success REGEX_LOCALE 'grep literal string, no -F' ' git grep -i "TILRAUN: HALLÓ HEIMUR!" ' +test_expect_success REGEX_LOCALE 'grep literal string, with -F' ' + git grep --debug -i -F "TILRAUN: Halló Heimur!" 2>&1 >/dev/null | + grep fixed >debug1 && + test_write_lines "fixed TILRAUN: Halló Heimur!" >expect1 && + test_cmp expect1 debug1 && + + git grep --debug -i -F "TILRAUN: HALLÓ HEIMUR!" 2>&1 >/dev/null | + grep fixed >debug2 && + test_write_lines "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 && + test_cmp expect2 debug2 +' + +test_expect_success REGEX_LOCALE 'grep string with regex, with -F' ' + test_write_lines "^*TILR^AUN:.* \\Halló \$He[]imur!\$" >file && + + git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 >/dev/null | + grep fixed >debug1 && + test_write_lines "fixed \\^*TILR^AUN:\\.\\* \\\\Halló \$He\\[]imur!\\\$" >expect1 && + test_cmp expect1 debug1 && + + git grep --debug -i -F "^*TILR^AUN:.* \\HALLÓ \$HE[]IMUR!\$" 2>&1 >/dev/null | + grep fixed >debug2 && + test_write_lines "fixed \\^*TILR^AUN:\\.\\* \\\\HALLÓ \$HE\\[]IMUR!\\\$" >expect2 && + test_cmp expect2 debug2 +' + test_done