From 663c5ad0356f5501e2db7a39a7abf76e8db26b7d Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 17 Nov 2021 11:20:23 +0000 Subject: [PATCH 1/6] diff histogram: intern strings Histogram is the only diff algorithm not to call xdl_classify_record(). xdl_classify_record() ensures that the hash values of two strings that are not equal differ which means that it is not necessary to use xdl_recmatch() when comparing lines, all that is necessary is to compare the hash values. This gives a 7% reduction in the runtime of "git log --patch" when using the histogram diff algorithm. Test HEAD^ HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.18(0.14+0.04) 0.19(0.17+0.02) +5.6% 4000.2: log --raw -3000 (tree-only) 0.99(0.77+0.21) 0.98(0.78+0.20) -1.0% 4000.3: log -p -3000 (Myers) 4.84(4.31+0.51) 4.81(4.15+0.64) -0.6% 4000.4: log -p -3000 --histogram 6.34(5.86+0.46) 5.87(5.19+0.66) -7.4% 4000.5: log -p -3000 --patience 5.39(4.60+0.76) 5.35(4.60+0.73) -0.7% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 5 ++--- xdiff/xprepare.c | 24 ++++++++---------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index e694bfd9e3..6c1c88a69a 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -91,9 +91,8 @@ struct region { static int cmp_recs(xpparam_t const *xpp, xrecord_t *r1, xrecord_t *r2) { - return r1->ha == r2->ha && - xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, - xpp->flags); + return r1->ha == r2->ha; + } #define CMP_ENV(xpp, env, s1, l1, s2, l2) \ diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index abeb8fb84e..7fae0727a0 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -181,15 +181,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) goto abort; - if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF) - hbits = hsize = 0; - else { - hbits = xdl_hashbits((unsigned int) narec); - hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) - goto abort; - memset(rhash, 0, hsize * sizeof(xrecord_t *)); - } + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { @@ -208,9 +204,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; - - if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) && - xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) + if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) goto abort; } } @@ -279,8 +273,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, enl1 = xdl_guess_lines(mf1, sample) + 1; enl2 = xdl_guess_lines(mf2, sample) + 1; - if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF && - xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) + if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) return -1; if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { @@ -305,8 +298,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, return -1; } - if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) - xdl_free_classifier(&cf); + xdl_free_classifier(&cf); return 0; } From b82dd3f7f20c4d9cc68543871c3634cb32ea0789 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 17 Nov 2021 11:20:24 +0000 Subject: [PATCH 2/6] xdiff: avoid unnecessary memory allocations rindex and ha are only used by xdl_cleanup_records() which is not called by the histogram or patience algorithms. The perf test results show a small reduction in run time but that is probably within the noise. Test HEAD^ HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.19(0.17+0.02) 0.19(0.12+0.07) +0.0% 4000.2: log --raw -3000 (tree-only) 0.98(0.78+0.20) 0.98(0.81+0.16) +0.0% 4000.3: log -p -3000 (Myers) 4.81(4.15+0.64) 4.81(4.23+0.56) +0.0% 4000.4: log -p -3000 --histogram 5.87(5.19+0.66) 5.83(5.11+0.70) -0.7% 4000.5: log -p -3000 --patience 5.35(4.60+0.73) 5.31(4.61+0.69) -0.7% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 7fae0727a0..4527a4a07c 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -213,10 +213,13 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ goto abort; memset(rchg, 0, (nrec + 2) * sizeof(char)); - if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) - goto abort; - if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) - goto abort; + if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && + (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { + if (!(rindex = xdl_malloc((nrec + 1) * sizeof(*rindex)))) + goto abort; + if (!(ha = xdl_malloc((nrec + 1) * sizeof(*ha)))) + goto abort; + } xdf->nrec = nrec; xdf->recs = recs; From 6b13bc32322dc64e52ef29c9f527746ad30d0ad8 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Wed, 17 Nov 2021 11:20:25 +0000 Subject: [PATCH 3/6] xdiff: simplify comparison Now that the histogram algorithm calls xdl_classify_record() it is no longer necessary to use xdl_recmatch() to compare lines, it is sufficient just to compare the hash values. This has a negligible effect on performance. Test HEAD~1 HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.19(0.12+0.07) 0.18(0.14+0.04) -5.3% 4000.2: log --raw -3000 (tree-only) 0.98(0.81+0.16) 0.98(0.79+0.18) +0.0% 4000.3: log -p -3000 (Myers) 4.81(4.23+0.56) 4.80(4.26+0.53) -0.2% 4000.4: log -p -3000 --histogram 5.83(5.11+0.70) 5.82(5.15+0.65) -0.2% 4000.5: log -p -3000 --patience 5.31(4.61+0.69) 5.30(4.54+0.75) -0.2% Signed-off-by: Phillip Wood Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index a4542c05b6..6a3b9280be 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -392,10 +392,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) { - return (rec1->ha == rec2->ha && - xdl_recmatch(rec1->ptr, rec1->size, - rec2->ptr, rec2->size, - flags)); + return (rec1->ha == rec2->ha); } /* From cf0b26d90c93ce0a600ed094107b5f1a611750e9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 3 Dec 2021 00:11:15 -0500 Subject: [PATCH 4/6] xdiff: drop CMP_ENV macro from xhistogram This macro has been unused since 43ca7530df (xdiff/xhistogram: rely on xdl_trim_ends(), 2011-08-01). The function that called it went away there, and its use in the CMP() macro was inlined. It probably should have been deleted then, but nobody noticed. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 6c1c88a69a..399cc236d7 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -95,9 +95,6 @@ static int cmp_recs(xpparam_t const *xpp, } -#define CMP_ENV(xpp, env, s1, l1, s2, l2) \ - (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) - #define CMP(i, s1, l1, s2, l2) \ (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) From 25449450c0d07dfd490906c5114f67452aa18de8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 3 Dec 2021 00:11:40 -0500 Subject: [PATCH 5/6] xdiff: drop xpparam_t parameter from histogram cmp_recs() Since 663c5ad035 (diff histogram: intern strings, 2021-11-17), our cmp_recs() does not call xdl_recmatch(), and thus no longer needs an xpparam_t struct from which to get the flags. We can drop the unused parameter from the function, as well as the macro which wraps it. There's no functional change here; it's just simplifying things (and making -Wunused-parameter happier). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 399cc236d7..80794748b0 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -88,15 +88,14 @@ struct region { #define REC(env, s, l) \ (env->xdf##s.recs[l - 1]) -static int cmp_recs(xpparam_t const *xpp, - xrecord_t *r1, xrecord_t *r2) +static int cmp_recs(xrecord_t *r1, xrecord_t *r2) { return r1->ha == r2->ha; } #define CMP(i, s1, l1, s2, l2) \ - (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) + (cmp_recs(REC(i->env, s1, l1), REC(i->env, s2, l2))) #define TABLE_HASH(index, side, line) \ XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) From 1e45db12141789c084e96a46adcf70858f05a1bc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 3 Dec 2021 00:12:01 -0500 Subject: [PATCH 6/6] xdiff: drop unused flags parameter from recs_match Since 6b13bc3232 (xdiff: simplify comparison, 2021-11-17), we do not call xdl_recmatch() from xdiffi.c's recs_match(), so we no longer need the "flags" parameter. That in turn lets us drop the flags parameters from the group-slide functions which call it. There's no functional change here; it's just making these functions a little simpler. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6a3b9280be..69689fab24 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -390,7 +390,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, } -static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) +static int recs_match(xrecord_t *rec1, xrecord_t *rec2) { return (rec1->ha == rec2->ha); } @@ -756,10 +756,10 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) * following group, expand this group to include it. Return 0 on success or -1 * if g cannot be slid down. */ -static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags) +static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) { if (g->end < xdf->nrec && - recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) { + recs_match(xdf->recs[g->start], xdf->recs[g->end])) { xdf->rchg[g->start++] = 0; xdf->rchg[g->end++] = 1; @@ -777,10 +777,10 @@ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, long flags) * into a previous group, expand this group to include it. Return 0 on success * or -1 if g cannot be slid up. */ -static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, long flags) +static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g) { if (g->start > 0 && - recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) { + recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) { xdf->rchg[--g->start] = 1; xdf->rchg[--g->end] = 0; @@ -830,7 +830,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { end_matching_other = -1; /* Shift the group backward as much as possible: */ - while (!group_slide_up(xdf, &g, flags)) + while (!group_slide_up(xdf, &g)) if (group_previous(xdfo, &go)) BUG("group sync broken sliding up"); @@ -845,7 +845,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { /* Now shift the group forward as far as possible: */ while (1) { - if (group_slide_down(xdf, &g, flags)) + if (group_slide_down(xdf, &g)) break; if (group_next(xdfo, &go)) BUG("group sync broken sliding down"); @@ -872,7 +872,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { * other file that it can align with. */ while (go.end == go.start) { - if (group_slide_up(xdf, &g, flags)) + if (group_slide_up(xdf, &g)) BUG("match disappeared"); if (group_previous(xdfo, &go)) BUG("group sync broken sliding to match"); @@ -915,7 +915,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { } while (g.end > best_shift) { - if (group_slide_up(xdf, &g, flags)) + if (group_slide_up(xdf, &g)) BUG("best shift unreached"); if (group_previous(xdfo, &go)) BUG("group sync broken sliding to blank line");