From b520abf1c8f3dcbd218c9ae26830b7e2c94866c7 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Nov 2017 12:15:57 -0800 Subject: [PATCH 1/5] sequencer: warn when internal merge may be suboptimal due to renameLimit When many files were renamed, the recursive merge strategy stopped detecting renames and left many paths with delete/modify conflicts, without any warning about what was going on or providing any hints about how to tell Git to spend more cycles to detect renames. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- sequencer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sequencer.c b/sequencer.c index 6d027b06c8..1aa769c82d 100644 --- a/sequencer.c +++ b/sequencer.c @@ -462,6 +462,7 @@ static int do_recursive_merge(struct commit *base, struct commit *next, if (is_rebase_i(opts) && clean <= 0) fputs(o.obuf.buf, stdout); strbuf_release(&o.obuf); + diff_warn_rename_limit("merge.renamelimit", o.needed_rename_limit, 0); if (clean < 0) return clean; From d6861d0258df95987696eab6c9bbc138a07190b9 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Nov 2017 12:15:58 -0800 Subject: [PATCH 2/5] progress: fix progress meters when dealing with lots of work The possibility of setting merge.renameLimit beyond 2^16 raises the possibility that the values passed to progress can exceed 2^32. Use uint64_t, because it "ought to be enough for anybody". :-) Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- diffcore-rename.c | 4 ++-- progress.c | 29 +++++++++++++++-------------- progress.h | 8 ++++---- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index 12dc2a056f..34ec05e091 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -534,7 +534,7 @@ void diffcore_rename(struct diff_options *options) if (options->show_rename_progress) { progress = start_delayed_progress( _("Performing inexact rename detection"), - rename_dst_nr * rename_src_nr); + (uint64_t)rename_dst_nr * (uint64_t)rename_src_nr); } mx = xcalloc(st_mult(NUM_CANDIDATE_PER_DST, num_create), sizeof(*mx)); @@ -571,7 +571,7 @@ void diffcore_rename(struct diff_options *options) diff_free_filespec_blob(two); } dst_cnt++; - display_progress(progress, (i+1)*rename_src_nr); + display_progress(progress, (uint64_t)(i+1)*(uint64_t)rename_src_nr); } stop_progress(&progress); diff --git a/progress.c b/progress.c index 289678d43d..edb97b1791 100644 --- a/progress.c +++ b/progress.c @@ -30,8 +30,8 @@ struct throughput { struct progress { const char *title; - int last_value; - unsigned total; + uint64_t last_value; + uint64_t total; unsigned last_percent; unsigned delay; unsigned delayed_percent_threshold; @@ -79,7 +79,7 @@ static int is_foreground_fd(int fd) return tpgrp < 0 || tpgrp == getpgid(0); } -static int display(struct progress *progress, unsigned n, const char *done) +static int display(struct progress *progress, uint64_t n, const char *done) { const char *eol, *tp; @@ -106,9 +106,10 @@ static int display(struct progress *progress, unsigned n, const char *done) if (percent != progress->last_percent || progress_update) { progress->last_percent = percent; if (is_foreground_fd(fileno(stderr)) || done) { - fprintf(stderr, "%s: %3u%% (%u/%u)%s%s", - progress->title, percent, n, - progress->total, tp, eol); + fprintf(stderr, "%s: %3u%% (%"PRIuMAX"/%"PRIuMAX")%s%s", + progress->title, percent, + (uintmax_t)n, (uintmax_t)progress->total, + tp, eol); fflush(stderr); } progress_update = 0; @@ -116,8 +117,8 @@ static int display(struct progress *progress, unsigned n, const char *done) } } else if (progress_update) { if (is_foreground_fd(fileno(stderr)) || done) { - fprintf(stderr, "%s: %u%s%s", - progress->title, n, tp, eol); + fprintf(stderr, "%s: %"PRIuMAX"%s%s", + progress->title, (uintmax_t)n, tp, eol); fflush(stderr); } progress_update = 0; @@ -127,7 +128,7 @@ static int display(struct progress *progress, unsigned n, const char *done) return 0; } -static void throughput_string(struct strbuf *buf, off_t total, +static void throughput_string(struct strbuf *buf, uint64_t total, unsigned int rate) { strbuf_reset(buf); @@ -138,7 +139,7 @@ static void throughput_string(struct strbuf *buf, off_t total, strbuf_addstr(buf, "/s"); } -void display_throughput(struct progress *progress, off_t total) +void display_throughput(struct progress *progress, uint64_t total) { struct throughput *tp; uint64_t now_ns; @@ -200,12 +201,12 @@ void display_throughput(struct progress *progress, off_t total) display(progress, progress->last_value, NULL); } -int display_progress(struct progress *progress, unsigned n) +int display_progress(struct progress *progress, uint64_t n) { return progress ? display(progress, n, NULL) : 0; } -static struct progress *start_progress_delay(const char *title, unsigned total, +static struct progress *start_progress_delay(const char *title, uint64_t total, unsigned percent_threshold, unsigned delay) { struct progress *progress = malloc(sizeof(*progress)); @@ -227,12 +228,12 @@ static struct progress *start_progress_delay(const char *title, unsigned total, return progress; } -struct progress *start_delayed_progress(const char *title, unsigned total) +struct progress *start_delayed_progress(const char *title, uint64_t total) { return start_progress_delay(title, total, 0, 2); } -struct progress *start_progress(const char *title, unsigned total) +struct progress *start_progress(const char *title, uint64_t total) { return start_progress_delay(title, total, 0, 0); } diff --git a/progress.h b/progress.h index 6392b63371..70a4d4a0d6 100644 --- a/progress.h +++ b/progress.h @@ -3,10 +3,10 @@ struct progress; -void display_throughput(struct progress *progress, off_t total); -int display_progress(struct progress *progress, unsigned n); -struct progress *start_progress(const char *title, unsigned total); -struct progress *start_delayed_progress(const char *title, unsigned total); +void display_throughput(struct progress *progress, uint64_t total); +int display_progress(struct progress *progress, uint64_t n); +struct progress *start_progress(const char *title, uint64_t total); +struct progress *start_delayed_progress(const char *title, uint64_t total); void stop_progress(struct progress **progress); void stop_progress_msg(struct progress **progress, const char *msg); From 9f7e4bfa3b6da16e3690312ff208cc27706b2c1b Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Nov 2017 12:15:59 -0800 Subject: [PATCH 3/5] diff: remove silent clamp of renameLimit In commit 0024a5492 (Fix the rename detection limit checking; 2007-09-14), the renameLimit was clamped to 32767. This appears to have been to simply avoid integer overflow in the following computation: num_create * num_src <= rename_limit * rename_limit although it also could be viewed as a hardcoded bound on the amount of CPU time we're willing to allow users to tell git to spend on handling renames. An upper bound may make sense, but unfortunately this upper bound was neither communicated to the users, nor documented anywhere. Although large limits can make things slow, we have users who would be ecstatic to have a small five file change be correctly cherry picked even if they have to manually specify a large limit and wait ten minutes for the renames to be detected. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- diff.c | 2 +- diffcore-rename.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/diff.c b/diff.c index 0763e89263..40054070bd 100644 --- a/diff.c +++ b/diff.c @@ -5454,7 +5454,7 @@ void diff_warn_rename_limit(const char *varname, int needed, int degraded_cc) warning(_(rename_limit_warning)); else return; - if (0 < needed && needed < 32767) + if (0 < needed) warning(_(rename_limit_advice), varname, needed); } diff --git a/diffcore-rename.c b/diffcore-rename.c index 34ec05e091..9ca0eaec70 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -391,14 +391,10 @@ static int too_many_rename_candidates(int num_create, * growing larger than a "rename_limit" square matrix, ie: * * num_create * num_src > rename_limit * rename_limit - * - * but handles the potential overflow case specially (and we - * assume at least 32-bit integers) */ - if (rename_limit <= 0 || rename_limit > 32767) - rename_limit = 32767; if ((num_create <= rename_limit || num_src <= rename_limit) && - (num_create * num_src <= rename_limit * rename_limit)) + ((uint64_t)num_create * (uint64_t)num_src + <= (uint64_t)rename_limit * (uint64_t)rename_limit)) return 0; options->needed_rename_limit = @@ -415,7 +411,8 @@ static int too_many_rename_candidates(int num_create, num_src++; } if ((num_create <= rename_limit || num_src <= rename_limit) && - (num_create * num_src <= rename_limit * rename_limit)) + ((uint64_t)num_create * (uint64_t)num_src + <= (uint64_t)rename_limit * (uint64_t)rename_limit)) return 2; return 1; } From 9268cf4a2ef61c334204b1237b2174a77d16f98d Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 13 Nov 2017 12:16:00 -0800 Subject: [PATCH 4/5] sequencer: show rename progress during cherry picks When trying to cherry-pick a change that has lots of renames, it is somewhat unsettling to wait a really long time without any feedback. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- sequencer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sequencer.c b/sequencer.c index 1aa769c82d..c9e30776c2 100644 --- a/sequencer.c +++ b/sequencer.c @@ -448,6 +448,7 @@ static int do_recursive_merge(struct commit *base, struct commit *next, o.branch2 = next ? next_label : "(empty tree)"; if (is_rebase_i(opts)) o.buffer_output = 2; + o.show_rename_progress = 1; head_tree = parse_tree_indirect(head); next_tree = next ? next->tree : empty_tree(); From 89973554b52cb533b01acfdcb16d8215344bf004 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Wed, 29 Nov 2017 12:11:54 -0800 Subject: [PATCH 5/5] diffcore-rename: make diff-tree -l0 mean -l In the documentation of diff-tree, it is stated that the -l option "prevents rename/copy detection from running if the number of rename/copy targets exceeds the specified number". The documentation does not mention any special handling for the number 0, but the implementation before commit 9f7e4bfa3b ("diff: remove silent clamp of renameLimit", 2017-11-13) treated 0 as a special value indicating that the rename limit is to be a very large number instead. The commit 9f7e4bfa3b changed that behavior, treating 0 as 0. Revert this behavior to what it was previously. This allows existing scripts and tools that use "-l0" to continue working. The alternative (to have "-l0" suppress rename detection) is probably much less useful, since users can just refrain from specifying -M and/or -C to have the same effect. Signed-off-by: Jonathan Tan Reviewed-by: Jonathan Nieder Reviewed-by: Elijah Newren Signed-off-by: Junio C Hamano --- diffcore-rename.c | 2 ++ t/t4001-diff-rename.sh | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/diffcore-rename.c b/diffcore-rename.c index 9ca0eaec70..245e999fe5 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -392,6 +392,8 @@ static int too_many_rename_candidates(int num_create, * * num_create * num_src > rename_limit * rename_limit */ + if (rename_limit <= 0) + rename_limit = 32767; if ((num_create <= rename_limit || num_src <= rename_limit) && ((uint64_t)num_create * (uint64_t)num_src <= (uint64_t)rename_limit * (uint64_t)rename_limit)) diff --git a/t/t4001-diff-rename.sh b/t/t4001-diff-rename.sh index 0d1fa45d25..eadf4f6244 100755 --- a/t/t4001-diff-rename.sh +++ b/t/t4001-diff-rename.sh @@ -230,4 +230,19 @@ test_expect_success 'rename pretty print common prefix and suffix overlap' ' test_i18ngrep " d/f/{ => f}/e " output ' +test_expect_success 'diff-tree -l0 defaults to a big rename limit, not zero' ' + test_write_lines line1 line2 line3 >myfile && + git add myfile && + git commit -m x && + + test_write_lines line1 line2 line4 >myotherfile && + git rm myfile && + git add myotherfile && + git commit -m x && + + git diff-tree -M -l0 HEAD HEAD^ >actual && + # Verify that a rename from myotherfile to myfile was detected + grep "myotherfile.*myfile" actual +' + test_done