From 39bdd84eaf646aa73a3709b0eb8be3f47378708f Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Sat, 20 Jul 2024 16:01:59 +0000 Subject: [PATCH 1/2] add-patch: handle splitting hunks with diff.suppressBlankEmpty When "add -p" parses diffs, it looks for context lines starting with a single space. But when diff.suppressBlankEmpty is in effect, an empty context line will omit the space, giving us a true empty line. This confuses the parser, which is unable to split based on such a line. It's tempting to say that we should just make sure that we generate a diff without that option. However, although we do not parse hunks that the user has manually edited with parse_diff() we do allow the user to split such hunks. As POSIX calls the decision of whether to print the space here "implementation-defined" we need to handle edited hunks where empty context lines omit the space. So let's handle both cases: a context line either starts with a space or consists of a totally empty line by normalizing the first character to a space when we parse them. Normalizing the first character rather than changing the code to check for a space or newline will hopefully future proof against introducing similar bugs if the code is changed. Reported-by: Ilya Tumaykin Helped-by: Jeff King Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- add-patch.c | 19 +++++++++++++------ t/t3701-add-interactive.sh | 19 +++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/add-patch.c b/add-patch.c index 79eda168eb..1c28a7fa1f 100644 --- a/add-patch.c +++ b/add-patch.c @@ -400,6 +400,12 @@ static void complete_file(char marker, struct hunk *hunk) hunk->splittable_into++; } +/* Empty context lines may omit the leading ' ' */ +static int normalize_marker(const char *p) +{ + return p[0] == '\n' || (p[0] == '\r' && p[1] == '\n') ? ' ' : p[0]; +} + static int parse_diff(struct add_p_state *s, const struct pathspec *ps) { struct strvec args = STRVEC_INIT; @@ -485,6 +491,7 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) while (p != pend) { char *eol = memchr(p, '\n', pend - p); const char *deleted = NULL, *mode_change = NULL; + char ch = normalize_marker(p); if (!eol) eol = pend; @@ -532,7 +539,7 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) * Start counting into how many hunks this one can be * split */ - marker = *p; + marker = ch; } else if (hunk == &file_diff->head && starts_with(p, "new file")) { file_diff->added = 1; @@ -586,10 +593,10 @@ static int parse_diff(struct add_p_state *s, const struct pathspec *ps) (int)(eol - (plain->buf + file_diff->head.start)), plain->buf + file_diff->head.start); - if ((marker == '-' || marker == '+') && *p == ' ') + if ((marker == '-' || marker == '+') && ch == ' ') hunk->splittable_into++; - if (marker && *p != '\\') - marker = *p; + if (marker && ch != '\\') + marker = ch; p = eol == pend ? pend : eol + 1; hunk->end = p - plain->buf; @@ -813,7 +820,7 @@ static int merge_hunks(struct add_p_state *s, struct file_diff *file_diff, (int)(hunk->end - hunk->start), plain + hunk->start); - if (plain[overlap_end] != ' ') + if (normalize_marker(&plain[overlap_end]) != ' ') return error(_("expected context line " "#%d in\n%.*s"), (int)(j + 1), @@ -953,7 +960,7 @@ static int split_hunk(struct add_p_state *s, struct file_diff *file_diff, context_line_count = 0; while (splittable_into > 1) { - ch = s->plain.buf[current]; + ch = normalize_marker(&s->plain.buf[current]); if (!ch) BUG("buffer overrun while splitting hunks"); diff --git a/t/t3701-add-interactive.sh b/t/t3701-add-interactive.sh index 0b5339ac6c..4c3e2bc82f 100755 --- a/t/t3701-add-interactive.sh +++ b/t/t3701-add-interactive.sh @@ -1130,4 +1130,23 @@ test_expect_success 'reset -p with unmerged files' ' test_must_be_empty staged ' +test_expect_success 'hunk splitting works with diff.suppressBlankEmpty' ' + test_config diff.suppressBlankEmpty true && + write_script fake-editor.sh <<-\EOF && + tr F G <"$1" >"$1.tmp" && + mv "$1.tmp" "$1" + EOF + + test_write_lines a b "" c d "" e f "" >file && + git add file && + test_write_lines A b "" c D "" e F "" >file && + ( + test_set_editor "$(pwd)/fake-editor.sh" && + test_write_lines s n y e q | git add -p file + ) && + git cat-file blob :file >actual && + test_write_lines a b "" c D "" e G "" >expect && + test_cmp expect actual +' + test_done From 60cf761ed14298d618597e87e50f25bb61171e84 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Sat, 20 Jul 2024 16:02:00 +0000 Subject: [PATCH 2/2] add-patch: use normalize_marker() when recounting edited hunk After the user has edited a hunk the number of lines in the pre- and post- image lines is recounted the hunk header can be updated before passing the hunk to "git apply". The recounting code correctly handles empty context lines where the leading ' ' is omitted by treating '\n' and '\r' as context lines. Update this code to use normalize_marker() so that the handling of empty context lines is consistent with the rest of the hunk parsing code. There is a small change in behavior as normalize_marker() only treats "\r\n" as an empty context line rather than any line starting with '\r'. This should not matter in practice as Macs have used Unix line endings since MacOs 10 was released in 2001 and if it transpires that someone is still using an earlier version of MacOs where lines end with '\r' then we will need to change the handling of '\r' in normalize_marker() anyway. Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- add-patch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/add-patch.c b/add-patch.c index 1c28a7fa1f..537cf400e6 100644 --- a/add-patch.c +++ b/add-patch.c @@ -1178,14 +1178,14 @@ static ssize_t recount_edited_hunk(struct add_p_state *s, struct hunk *hunk, header->old_count = header->new_count = 0; for (i = hunk->start; i < hunk->end; ) { - switch (s->plain.buf[i]) { + switch(normalize_marker(&s->plain.buf[i])) { case '-': header->old_count++; break; case '+': header->new_count++; break; - case ' ': case '\r': case '\n': + case ' ': header->old_count++; header->new_count++; break;