From cc8e26ee8d56dbdb442796a43aa7f30d3684b036 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:46:56 -0400 Subject: [PATCH 1/5] grep: stop modifying buffer in strip_timestamp When grepping for headers in commit objects, we receive individual lines (e.g., "author Name 1234 -0000"), and then strip off the timestamp to do our match. We do so by writing a NUL byte over the whitespace separator, and then remembering to restore it later. We had to do it this way when this was added back in a4d7d2c6db (log --author/--committer: really match only with name part, 2008-09-04), because we fed the result directly to regexec(), which expects a NUL-terminated string. But since b7d36ffca0 (regex: use regexec_buf(), 2016-09-21), we have a function which can match part of a buffer. So instead of modifying the string, we can instead just move the "eol" pointer, and the rest of the code will do the right thing. This will let further patches mark more buffers as "const". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/grep.c b/grep.c index 79598f245f..5b1f2da4d3 100644 --- a/grep.c +++ b/grep.c @@ -922,20 +922,16 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, return hit; } -static int strip_timestamp(char *bol, char **eol_p) +static void strip_timestamp(char *bol, char **eol_p) { char *eol = *eol_p; - int ch; while (bol < --eol) { if (*eol != '>') continue; *eol_p = ++eol; - ch = *eol; - *eol = '\0'; - return ch; + break; } - return 0; } static struct { @@ -952,7 +948,6 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, regmatch_t *pmatch, int eflags) { int hit = 0; - int saved_ch = 0; const char *start = bol; if ((p->token != GREP_PATTERN) && @@ -971,7 +966,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, switch (p->field) { case GREP_HEADER_AUTHOR: case GREP_HEADER_COMMITTER: - saved_ch = strip_timestamp(bol, &eol); + strip_timestamp(bol, &eol); break; default: break; @@ -1021,8 +1016,6 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, goto again; } } - if (p->token == GREP_PATTERN_HEAD && saved_ch) - *eol = saved_ch; if (hit) { pmatch[0].rm_so += bol - start; pmatch[0].rm_eo += bol - start; From 995e525b1729ada354e443f16e1c0fad59df25a8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:48:09 -0400 Subject: [PATCH 2/5] grep: stop modifying buffer in show_line() When showing lines via grep (or looking for funcnames), we call show_line() on a multi-line buffer. It finds the end of line and marks it with a NUL. However, we don't need to do so, as the resulting line is only used along with its "eol" marker: - we pass both to next_match(), which takes care to look at only the bytes we specified - we pass the line to output_color() without its matching eol marker. However, we do use the "match" struct we got from next_match() to tell it how many bytes to look at (which can never exceed the string we passed it). So we can stop setting and restoring this NUL marker. That makes the code simpler, and will allow us to take a const buffer in a future patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/grep.c b/grep.c index 5b1f2da4d3..70af01d1c1 100644 --- a/grep.c +++ b/grep.c @@ -1239,7 +1239,6 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, if (opt->color || opt->only_matching) { regmatch_t match; enum grep_context ctx = GREP_CONTEXT_BODY; - int ch = *eol; int eflags = 0; if (opt->color) { @@ -1254,7 +1253,6 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, else if (sign == '=') line_color = opt->colors[GREP_COLOR_FUNCTION]; } - *eol = '\0'; while (next_match(opt, bol, eol, ctx, &match, eflags)) { if (match.rm_so == match.rm_eo) break; @@ -1272,7 +1270,6 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, rest -= match.rm_eo; eflags = REG_NOTBOL; } - *eol = ch; } if (!opt->only_matching) { output_color(opt, bol, rest, line_color); From f84e79ff4bffbcd9de85adc270f5164a6b024d34 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:48:44 -0400 Subject: [PATCH 3/5] grep: stop modifying buffer in grep_source_1() We find the end of each matching line of a buffer, and then temporarily write a NUL to turn it into a regular C string. But we don't need to do so, because the only thing we do in the interim is pass the line and its length (via an "eol" pointer) to match_line(). And that function should only look at the bytes we passed it, whether it has a terminating NUL or not. We can drop this temporary write in order to simplify the code and make it easier to use const buffers in more of grep.c. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/grep.c b/grep.c index 70af01d1c1..32c4641443 100644 --- a/grep.c +++ b/grep.c @@ -1616,7 +1616,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle bol = gs->buf; left = gs->size; while (left) { - char *eol, ch; + char *eol; int hit; ssize_t cno; ssize_t col = -1, icol = -1; @@ -1637,14 +1637,11 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle && look_ahead(opt, &left, &lno, &bol)) break; eol = end_of_line(bol, &left); - ch = *eol; - *eol = 0; if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) ctx = GREP_CONTEXT_BODY; hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits); - *eol = ch; if (collect_hits) goto next_line; From 1a845fbc48f2613c0daab717ee934e066e65723d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:49:49 -0400 Subject: [PATCH 4/5] grep: mark "haystack" buffers as const When we're grepping in a buffer, we don't need to modify it. So we can take "const char *" buffers, rather than "char *". This can avoid some awkward casts in our callers, and make our expectations more clear (we will not take ownership of the memory, nor will we ever write to it). These spots don't all necessarily have to be converted at the same time, but some of them are dependent on others, because we pass pointers-to-pointers in a few cases. And none of this should change any behavior, since we're just adding "const" qualifiers (and likewise, the compiler will let us know if we missed any spots). So it's relatively low-risk to just do this all at once. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 61 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/grep.c b/grep.c index 32c4641443..9603ac119d 100644 --- a/grep.c +++ b/grep.c @@ -867,7 +867,7 @@ void free_grep_patterns(struct grep_opt *opt) free_pattern_expr(opt->pattern_expression); } -static char *end_of_line(char *cp, unsigned long *left) +static const char *end_of_line(const char *cp, unsigned long *left) { unsigned long l = *left; while (l && *cp != '\n') { @@ -908,7 +908,8 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } -static int patmatch(struct grep_pat *p, char *line, char *eol, +static int patmatch(struct grep_pat *p, + const char *line, const char *eol, regmatch_t *match, int eflags) { int hit; @@ -922,9 +923,9 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, return hit; } -static void strip_timestamp(char *bol, char **eol_p) +static void strip_timestamp(const char *bol, const char **eol_p) { - char *eol = *eol_p; + const char *eol = *eol_p; while (bol < --eol) { if (*eol != '>') @@ -943,7 +944,8 @@ static struct { { "reflog ", 7 }, }; -static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, +static int match_one_pattern(struct grep_pat *p, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { @@ -1023,8 +1025,9 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, return hit; } -static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol, - char *eol, enum grep_context ctx, ssize_t *col, +static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, + const char *bol, const char *eol, + enum grep_context ctx, ssize_t *col, ssize_t *icol, int collect_hits) { int h = 0; @@ -1091,7 +1094,8 @@ static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, char *bol, return h; } -static int match_expr(struct grep_opt *opt, char *bol, char *eol, +static int match_expr(struct grep_opt *opt, + const char *bol, const char *eol, enum grep_context ctx, ssize_t *col, ssize_t *icol, int collect_hits) { @@ -1099,7 +1103,8 @@ static int match_expr(struct grep_opt *opt, char *bol, char *eol, return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits); } -static int match_line(struct grep_opt *opt, char *bol, char *eol, +static int match_line(struct grep_opt *opt, + const char *bol, const char *eol, ssize_t *col, ssize_t *icol, enum grep_context ctx, int collect_hits) { @@ -1131,7 +1136,8 @@ static int match_line(struct grep_opt *opt, char *bol, char *eol, return hit; } -static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, +static int match_next_pattern(struct grep_pat *p, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { @@ -1152,7 +1158,8 @@ static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, return 1; } -static int next_match(struct grep_opt *opt, char *bol, char *eol, +static int next_match(struct grep_opt *opt, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { struct grep_pat *p; @@ -1208,7 +1215,8 @@ static void show_line_header(struct grep_opt *opt, const char *name, } } -static void show_line(struct grep_opt *opt, char *bol, char *eol, +static void show_line(struct grep_opt *opt, + const char *bol, const char *eol, const char *name, unsigned lno, ssize_t cno, char sign) { int rest = eol - bol; @@ -1297,7 +1305,8 @@ static inline void grep_attr_unlock(void) pthread_mutex_unlock(&grep_attr_mutex); } -static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol) +static int match_funcname(struct grep_opt *opt, struct grep_source *gs, + const char *bol, const char *eol) { xdemitconf_t *xecfg = opt->priv; if (xecfg && !xecfg->find_func) { @@ -1324,10 +1333,10 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo } static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, - char *bol, unsigned lno) + const char *bol, unsigned lno) { while (bol > gs->buf) { - char *eol = --bol; + const char *eol = --bol; while (bol > gs->buf && bol[-1] != '\n') bol--; @@ -1346,7 +1355,7 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, static int is_empty_line(const char *bol, const char *eol); static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, - char *bol, char *end, unsigned lno) + const char *bol, const char *end, unsigned lno) { unsigned cur = lno, from = 1, funcname_lno = 0, orig_from; int funcname_needed = !!opt->funcname, comment_needed = 0; @@ -1366,8 +1375,8 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, /* Rewind. */ while (bol > gs->buf && cur > from) { - char *next_bol = bol; - char *eol = --bol; + const char *next_bol = bol; + const char *eol = --bol; while (bol > gs->buf && bol[-1] != '\n') bol--; @@ -1398,7 +1407,7 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, /* Back forward. */ while (cur < lno) { - char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; + const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; while (*eol != '\n') eol++; @@ -1426,12 +1435,12 @@ static int should_lookahead(struct grep_opt *opt) static int look_ahead(struct grep_opt *opt, unsigned long *left_p, unsigned *lno_p, - char **bol_p) + const char **bol_p) { unsigned lno = *lno_p; - char *bol = *bol_p; + const char *bol = *bol_p; struct grep_pat *p; - char *sp, *last_bol; + const char *sp, *last_bol; regoff_t earliest = -1; for (p = opt->pattern_list; p; p = p->next) { @@ -1533,8 +1542,8 @@ static int is_empty_line(const char *bol, const char *eol) static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits) { - char *bol; - char *peek_bol = NULL; + const char *bol; + const char *peek_bol = NULL; unsigned long left; unsigned lno = 1; unsigned last_hit = 0; @@ -1616,7 +1625,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle bol = gs->buf; left = gs->size; while (left) { - char *eol; + const char *eol; int hit; ssize_t cno; ssize_t col = -1, icol = -1; @@ -1700,7 +1709,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle } if (show_function && (!peek_bol || peek_bol < bol)) { unsigned long peek_left = left; - char *peek_eol = eol; + const char *peek_eol = eol; /* * Trailing empty lines are not interesting. From 1e66871608d1f6f4cd66e899ee33755bbf6deafa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 20 Sep 2021 23:51:28 -0400 Subject: [PATCH 5/5] grep: store grep_source buffer as const Our grep_buffer() function takes a non-const buffer, which is confusing: we don't take ownership of nor write to the buffer. This mostly comes from the fact that the underlying grep_source struct in which we store the buffer uses non-const pointer. The memory pointed to by the struct is sometimes owned by us (for FILE or OID sources), and sometimes not (for BUF sources). Let's store it as const, which lets us err on the side of caution (i.e., the compiler will warn us if any of our code writes to or tries to free it). As a result, we must annotate the one place where we do free it by casting away the constness. But that's a small price to pay for the extra safety and clarity elsewhere (and indeed, it already had a comment explaining why GREP_SOURCE_BUF _didn't_ free it). And then we can mark grep_buffer() as taking a const buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- grep.c | 9 ++++++--- grep.h | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/grep.c b/grep.c index 9603ac119d..14fe8a0fd2 100644 --- a/grep.c +++ b/grep.c @@ -1821,7 +1821,8 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs) return grep_source_1(opt, gs, 0); } -static void grep_source_init_buf(struct grep_source *gs, char *buf, +static void grep_source_init_buf(struct grep_source *gs, + const char *buf, unsigned long size) { gs->type = GREP_SOURCE_BUF; @@ -1833,7 +1834,7 @@ static void grep_source_init_buf(struct grep_source *gs, char *buf, gs->identifier = NULL; } -int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) +int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size) { struct grep_source gs; int r; @@ -1885,7 +1886,9 @@ void grep_source_clear_data(struct grep_source *gs) switch (gs->type) { case GREP_SOURCE_FILE: case GREP_SOURCE_OID: - FREE_AND_NULL(gs->buf); + /* these types own the buffer */ + free((char *)gs->buf); + gs->buf = NULL; gs->size = 0; break; case GREP_SOURCE_BUF: diff --git a/grep.h b/grep.h index 128007db65..3cb8a83ae8 100644 --- a/grep.h +++ b/grep.h @@ -189,7 +189,7 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *orig void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *); void compile_grep_patterns(struct grep_opt *opt); void free_grep_patterns(struct grep_opt *opt); -int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size); +int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size); struct grep_source { char *name; @@ -202,7 +202,7 @@ struct grep_source { void *identifier; struct repository *repo; /* if GREP_SOURCE_OID */ - char *buf; + const char *buf; unsigned long size; char *path; /* for attribute lookups */