1
0
mirror of https://github.com/git/git.git synced 2024-10-22 11:18:17 +02:00
git/xdiff/xdiffi.c
Michael Haggerty a8fd78cc53 xdl_change_compact(): fix compaction heuristic to adjust ixo
The code branch used for the compaction heuristic forgot to keep ixo in
sync while the group was shifted. This is certainly wrong, as it causes
the two counters to get out of sync.

I *think* that this bug could also have caused the function to read past
the end of the rchgo array, though I haven't done the work to prove it
for sure. Here is my reasoning:

If ixo is not decremented correctly during one iteration of the outer
while loop, then it will loose sync with the ix counter. In particular,
ixo will be too large.

Suppose that the next iterations of the outer while loop (i.e.,
processing the next block of add/delete lines) don't have any sliders.
Then the ixo counter would be incremented by the number of non-changed
lines in xdf, which is the same as the number of non-changed lines in
xdfo that *should have* followed the group that experienced the
malfunction. But since ixo was too large at the end of that iteration,
it will be incremented past the end of the xdfo->rchg array, and will
try to read that memory illegally.

Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 13:51:47 -07:00

646 lines
16 KiB
C

/*
* LibXDiff by Davide Libenzi ( File Differential Library )
* Copyright (C) 2003 Davide Libenzi
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Davide Libenzi <davidel@xmailserver.org>
*
*/
#include "xinclude.h"
#define XDL_MAX_COST_MIN 256
#define XDL_HEUR_MIN_COST 256
#define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
#define XDL_SNAKE_CNT 20
#define XDL_K_HEUR 4
typedef struct s_xdpsplit {
long i1, i2;
int min_lo, min_hi;
} xdpsplit_t;
static long xdl_split(unsigned long const *ha1, long off1, long lim1,
unsigned long const *ha2, long off2, long lim2,
long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
xdalgoenv_t *xenv);
static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
/*
* See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
* the forward diagonal starting from (off1, off2) and the backward diagonal
* starting from (lim1, lim2). If the K values on the same diagonal crosses
* returns the furthest point of reach. We might end up having to expensive
* cases using this algorithm is full, so a little bit of heuristic is needed
* to cut the search and to return a suboptimal point.
*/
static long xdl_split(unsigned long const *ha1, long off1, long lim1,
unsigned long const *ha2, long off2, long lim2,
long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
xdalgoenv_t *xenv) {
long dmin = off1 - lim2, dmax = lim1 - off2;
long fmid = off1 - off2, bmid = lim1 - lim2;
long odd = (fmid - bmid) & 1;
long fmin = fmid, fmax = fmid;
long bmin = bmid, bmax = bmid;
long ec, d, i1, i2, prev1, best, dd, v, k;
/*
* Set initial diagonal values for both forward and backward path.
*/
kvdf[fmid] = off1;
kvdb[bmid] = lim1;
for (ec = 1;; ec++) {
int got_snake = 0;
/*
* We need to extent the diagonal "domain" by one. If the next
* values exits the box boundaries we need to change it in the
* opposite direction because (max - min) must be a power of two.
* Also we initialize the external K value to -1 so that we can
* avoid extra conditions check inside the core loop.
*/
if (fmin > dmin)
kvdf[--fmin - 1] = -1;
else
++fmin;
if (fmax < dmax)
kvdf[++fmax + 1] = -1;
else
--fmax;
for (d = fmax; d >= fmin; d -= 2) {
if (kvdf[d - 1] >= kvdf[d + 1])
i1 = kvdf[d - 1] + 1;
else
i1 = kvdf[d + 1];
prev1 = i1;
i2 = i1 - d;
for (; i1 < lim1 && i2 < lim2 && ha1[i1] == ha2[i2]; i1++, i2++);
if (i1 - prev1 > xenv->snake_cnt)
got_snake = 1;
kvdf[d] = i1;
if (odd && bmin <= d && d <= bmax && kvdb[d] <= i1) {
spl->i1 = i1;
spl->i2 = i2;
spl->min_lo = spl->min_hi = 1;
return ec;
}
}
/*
* We need to extent the diagonal "domain" by one. If the next
* values exits the box boundaries we need to change it in the
* opposite direction because (max - min) must be a power of two.
* Also we initialize the external K value to -1 so that we can
* avoid extra conditions check inside the core loop.
*/
if (bmin > dmin)
kvdb[--bmin - 1] = XDL_LINE_MAX;
else
++bmin;
if (bmax < dmax)
kvdb[++bmax + 1] = XDL_LINE_MAX;
else
--bmax;
for (d = bmax; d >= bmin; d -= 2) {
if (kvdb[d - 1] < kvdb[d + 1])
i1 = kvdb[d - 1];
else
i1 = kvdb[d + 1] - 1;
prev1 = i1;
i2 = i1 - d;
for (; i1 > off1 && i2 > off2 && ha1[i1 - 1] == ha2[i2 - 1]; i1--, i2--);
if (prev1 - i1 > xenv->snake_cnt)
got_snake = 1;
kvdb[d] = i1;
if (!odd && fmin <= d && d <= fmax && i1 <= kvdf[d]) {
spl->i1 = i1;
spl->i2 = i2;
spl->min_lo = spl->min_hi = 1;
return ec;
}
}
if (need_min)
continue;
/*
* If the edit cost is above the heuristic trigger and if
* we got a good snake, we sample current diagonals to see
* if some of the, have reached an "interesting" path. Our
* measure is a function of the distance from the diagonal
* corner (i1 + i2) penalized with the distance from the
* mid diagonal itself. If this value is above the current
* edit cost times a magic factor (XDL_K_HEUR) we consider
* it interesting.
*/
if (got_snake && ec > xenv->heur_min) {
for (best = 0, d = fmax; d >= fmin; d -= 2) {
dd = d > fmid ? d - fmid: fmid - d;
i1 = kvdf[d];
i2 = i1 - d;
v = (i1 - off1) + (i2 - off2) - dd;
if (v > XDL_K_HEUR * ec && v > best &&
off1 + xenv->snake_cnt <= i1 && i1 < lim1 &&
off2 + xenv->snake_cnt <= i2 && i2 < lim2) {
for (k = 1; ha1[i1 - k] == ha2[i2 - k]; k++)
if (k == xenv->snake_cnt) {
best = v;
spl->i1 = i1;
spl->i2 = i2;
break;
}
}
}
if (best > 0) {
spl->min_lo = 1;
spl->min_hi = 0;
return ec;
}
for (best = 0, d = bmax; d >= bmin; d -= 2) {
dd = d > bmid ? d - bmid: bmid - d;
i1 = kvdb[d];
i2 = i1 - d;
v = (lim1 - i1) + (lim2 - i2) - dd;
if (v > XDL_K_HEUR * ec && v > best &&
off1 < i1 && i1 <= lim1 - xenv->snake_cnt &&
off2 < i2 && i2 <= lim2 - xenv->snake_cnt) {
for (k = 0; ha1[i1 + k] == ha2[i2 + k]; k++)
if (k == xenv->snake_cnt - 1) {
best = v;
spl->i1 = i1;
spl->i2 = i2;
break;
}
}
}
if (best > 0) {
spl->min_lo = 0;
spl->min_hi = 1;
return ec;
}
}
/*
* Enough is enough. We spent too much time here and now we collect
* the furthest reaching path using the (i1 + i2) measure.
*/
if (ec >= xenv->mxcost) {
long fbest, fbest1, bbest, bbest1;
fbest = fbest1 = -1;
for (d = fmax; d >= fmin; d -= 2) {
i1 = XDL_MIN(kvdf[d], lim1);
i2 = i1 - d;
if (lim2 < i2)
i1 = lim2 + d, i2 = lim2;
if (fbest < i1 + i2) {
fbest = i1 + i2;
fbest1 = i1;
}
}
bbest = bbest1 = XDL_LINE_MAX;
for (d = bmax; d >= bmin; d -= 2) {
i1 = XDL_MAX(off1, kvdb[d]);
i2 = i1 - d;
if (i2 < off2)
i1 = off2 + d, i2 = off2;
if (i1 + i2 < bbest) {
bbest = i1 + i2;
bbest1 = i1;
}
}
if ((lim1 + lim2) - bbest < fbest - (off1 + off2)) {
spl->i1 = fbest1;
spl->i2 = fbest - fbest1;
spl->min_lo = 1;
spl->min_hi = 0;
} else {
spl->i1 = bbest1;
spl->i2 = bbest - bbest1;
spl->min_lo = 0;
spl->min_hi = 1;
}
return ec;
}
}
}
/*
* Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
* the box splitting function. Note that the real job (marking changed lines)
* is done in the two boundary reaching checks.
*/
int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
diffdata_t *dd2, long off2, long lim2,
long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) {
unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha;
/*
* Shrink the box by walking through each diagonal snake (SW and NE).
*/
for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++);
for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--);
/*
* If one dimension is empty, then all records on the other one must
* be obviously changed.
*/
if (off1 == lim1) {
char *rchg2 = dd2->rchg;
long *rindex2 = dd2->rindex;
for (; off2 < lim2; off2++)
rchg2[rindex2[off2]] = 1;
} else if (off2 == lim2) {
char *rchg1 = dd1->rchg;
long *rindex1 = dd1->rindex;
for (; off1 < lim1; off1++)
rchg1[rindex1[off1]] = 1;
} else {
xdpsplit_t spl;
spl.i1 = spl.i2 = 0;
/*
* Divide ...
*/
if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb,
need_min, &spl, xenv) < 0) {
return -1;
}
/*
* ... et Impera.
*/
if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2,
kvdf, kvdb, spl.min_lo, xenv) < 0 ||
xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2,
kvdf, kvdb, spl.min_hi, xenv) < 0) {
return -1;
}
}
return 0;
}
int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe) {
long ndiags;
long *kvd, *kvdf, *kvdb;
xdalgoenv_t xenv;
diffdata_t dd1, dd2;
if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF)
return xdl_do_patience_diff(mf1, mf2, xpp, xe);
if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
return -1;
}
/*
* Allocate and setup K vectors to be used by the differential algorithm.
* One is to store the forward path and one to store the backward path.
*/
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
if (!(kvd = (long *) xdl_malloc((2 * ndiags + 2) * sizeof(long)))) {
xdl_free_env(xe);
return -1;
}
kvdf = kvd;
kvdb = kvdf + ndiags;
kvdf += xe->xdf2.nreff + 1;
kvdb += xe->xdf2.nreff + 1;
xenv.mxcost = xdl_bogosqrt(ndiags);
if (xenv.mxcost < XDL_MAX_COST_MIN)
xenv.mxcost = XDL_MAX_COST_MIN;
xenv.snake_cnt = XDL_SNAKE_CNT;
xenv.heur_min = XDL_HEUR_MIN_COST;
dd1.nrec = xe->xdf1.nreff;
dd1.ha = xe->xdf1.ha;
dd1.rchg = xe->xdf1.rchg;
dd1.rindex = xe->xdf1.rindex;
dd2.nrec = xe->xdf2.nreff;
dd2.ha = xe->xdf2.ha;
dd2.rchg = xe->xdf2.rchg;
dd2.rindex = xe->xdf2.rindex;
if (xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec,
kvdf, kvdb, (xpp->flags & XDF_NEED_MINIMAL) != 0, &xenv) < 0) {
xdl_free(kvd);
xdl_free_env(xe);
return -1;
}
xdl_free(kvd);
return 0;
}
static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
xdchange_t *xch;
if (!(xch = (xdchange_t *) xdl_malloc(sizeof(xdchange_t))))
return NULL;
xch->next = xscr;
xch->i1 = i1;
xch->i2 = i2;
xch->chg1 = chg1;
xch->chg2 = chg2;
xch->ignore = 0;
return xch;
}
static int is_blank_line(xrecord_t **recs, long ix, long flags)
{
return xdl_blankline(recs[ix]->ptr, recs[ix]->size, flags);
}
static int recs_match(xrecord_t **recs, long ixs, long ix, long flags)
{
return (recs[ixs]->ha == recs[ix]->ha &&
xdl_recmatch(recs[ixs]->ptr, recs[ixs]->size,
recs[ix]->ptr, recs[ix]->size,
flags));
}
int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec;
char *rchg = xdf->rchg, *rchgo = xdfo->rchg;
unsigned int blank_lines;
xrecord_t **recs = xdf->recs;
/*
* This is the same of what GNU diff does. Move back and forward
* change groups for a consistent and pretty diff output. This also
* helps in finding joinable change groups and reduce the diff size.
*/
for (ix = ixo = 0;;) {
/*
* Find the first changed line in the to-be-compacted file.
* We need to keep track of both indexes, so if we find a
* changed lines group on the other file, while scanning the
* to-be-compacted file, we need to skip it properly. Note
* that loops that are testing for changed lines on rchg* do
* not need index bounding since the array is prepared with
* a zero at position -1 and N.
*/
for (; ix < nrec && !rchg[ix]; ix++)
while (rchgo[ixo++]);
if (ix == nrec)
break;
/*
* Record the start of a changed-group in the to-be-compacted file
* and find the end of it, on both to-be-compacted and other file
* indexes (ix and ixo).
*/
ixs = ix;
for (ix++; rchg[ix]; ix++);
for (; rchgo[ixo]; ixo++);
do {
grpsiz = ix - ixs;
blank_lines = 0;
/*
* If the line before the current change group, is equal to
* the last line of the current change group, shift backward
* the group.
*/
while (ixs > 0 && recs_match(recs, ixs - 1, ix - 1, flags)) {
rchg[--ixs] = 1;
rchg[--ix] = 0;
/*
* This change might have joined two change groups,
* so we try to take this scenario in account by moving
* the start index accordingly (and so the other-file
* end-of-group index).
*/
for (; rchg[ixs - 1]; ixs--);
while (rchgo[--ixo]);
}
/*
* Record the end-of-group position in case we are matched
* with a group of changes in the other file (that is, the
* change record before the end-of-group index in the other
* file is set).
*/
ixref = rchgo[ixo - 1] ? ix: nrec;
/*
* If the first line of the current change group, is equal to
* the line next of the current change group, shift forward
* the group.
*/
while (ix < nrec && recs_match(recs, ixs, ix, flags)) {
blank_lines += is_blank_line(recs, ix, flags);
rchg[ixs++] = 0;
rchg[ix++] = 1;
/*
* This change might have joined two change groups,
* so we try to take this scenario in account by moving
* the start index accordingly (and so the other-file
* end-of-group index). Keep tracking the reference
* index in case we are shifting together with a
* corresponding group of changes in the other file.
*/
for (; rchg[ix]; ix++);
while (rchgo[++ixo])
ixref = ix;
}
} while (grpsiz != ix - ixs);
/*
* Try to move back the possibly merged group of changes, to match
* the recorded position in the other file.
*/
while (ixref < ix) {
rchg[--ixs] = 1;
rchg[--ix] = 0;
while (rchgo[--ixo]);
}
/*
* If a group can be moved back and forth, see if there is a
* blank line in the moving space. If there is a blank line,
* make sure the last blank line is the end of the group.
*
* As we already shifted the group forward as far as possible
* in the earlier loop, we need to shift it back only if at all.
*/
if ((flags & XDF_COMPACTION_HEURISTIC) && blank_lines) {
while (ixs > 0 &&
!is_blank_line(recs, ix - 1, flags) &&
recs_match(recs, ixs - 1, ix - 1, flags)) {
rchg[--ixs] = 1;
rchg[--ix] = 0;
while (rchgo[--ixo]);
}
}
}
return 0;
}
int xdl_build_script(xdfenv_t *xe, xdchange_t **xscr) {
xdchange_t *cscr = NULL, *xch;
char *rchg1 = xe->xdf1.rchg, *rchg2 = xe->xdf2.rchg;
long i1, i2, l1, l2;
/*
* Trivial. Collects "groups" of changes and creates an edit script.
*/
for (i1 = xe->xdf1.nrec, i2 = xe->xdf2.nrec; i1 >= 0 || i2 >= 0; i1--, i2--)
if (rchg1[i1 - 1] || rchg2[i2 - 1]) {
for (l1 = i1; rchg1[i1 - 1]; i1--);
for (l2 = i2; rchg2[i2 - 1]; i2--);
if (!(xch = xdl_add_change(cscr, i1, i2, l1 - i1, l2 - i2))) {
xdl_free_script(cscr);
return -1;
}
cscr = xch;
}
*xscr = cscr;
return 0;
}
void xdl_free_script(xdchange_t *xscr) {
xdchange_t *xch;
while ((xch = xscr) != NULL) {
xscr = xscr->next;
xdl_free(xch);
}
}
static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
xdemitconf_t const *xecfg)
{
xdchange_t *xch, *xche;
for (xch = xscr; xch; xch = xche->next) {
xche = xdl_get_hunk(&xch, xecfg);
if (!xch)
break;
if (xecfg->hunk_func(xch->i1, xche->i1 + xche->chg1 - xch->i1,
xch->i2, xche->i2 + xche->chg2 - xch->i2,
ecb->priv) < 0)
return -1;
}
return 0;
}
static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
{
xdchange_t *xch;
for (xch = xscr; xch; xch = xch->next) {
int ignore = 1;
xrecord_t **rec;
long i;
rec = &xe->xdf1.recs[xch->i1];
for (i = 0; i < xch->chg1 && ignore; i++)
ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
rec = &xe->xdf2.recs[xch->i2];
for (i = 0; i < xch->chg2 && ignore; i++)
ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
xch->ignore = ignore;
}
}
int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
xdchange_t *xscr;
xdfenv_t xe;
emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
return -1;
}
if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
xdl_build_script(&xe, &xscr) < 0) {
xdl_free_env(&xe);
return -1;
}
if (xscr) {
if (xpp->flags & XDF_IGNORE_BLANK_LINES)
xdl_mark_ignorable(xscr, &xe, xpp->flags);
if (ef(&xe, xscr, ecb, xecfg) < 0) {
xdl_free_script(xscr);
xdl_free_env(&xe);
return -1;
}
xdl_free_script(xscr);
}
xdl_free_env(&xe);
return 0;
}