From 452f4fa51e1fd1d0e3155fb4f4d8913298ac63a6 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Thu, 7 Jul 2011 12:23:55 +0800 Subject: [PATCH 01/12] xdiff/xprepare: use memset() Use memset() instead of a for loop to initialize. This could give a performance advantage. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 1689085235..783631a34c 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -64,8 +64,6 @@ static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2); static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { - long i; - cf->flags = flags; cf->hbits = xdl_hashbits((unsigned int) size); @@ -80,8 +78,7 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { xdl_cha_free(&cf->ncha); return -1; } - for (i = 0; i < cf->hsize; i++) - cf->rchash[i] = NULL; + memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); cf->count = 0; @@ -136,7 +133,7 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { unsigned int hbits; - long i, nrec, hsize, bsize; + long nrec, hsize, bsize; unsigned long hav; char const *blk, *cur, *top, *prev; xrecord_t *crec; @@ -164,8 +161,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdl_cha_free(&xdf->rcha); return -1; } - for (i = 0; i < hsize; i++) - rhash[i] = NULL; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { From 159607a8f1ad16c0a04e0f17d5f8a35299696bc6 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Thu, 7 Jul 2011 12:23:56 +0800 Subject: [PATCH 02/12] xdiff/xprepare: refactor abort cleanups Group free()'s that are called when a malloc() fails in xdl_prepare_ctx(), making for more readable code. Also add a free() on ha, in case future git hackers add allocs after the ha malloc. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 91 +++++++++++++++++------------------------------- 1 file changed, 32 insertions(+), 59 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 783631a34c..0f571db2d7 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -143,24 +143,21 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, char *rchg; long *rindex; - if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) { + ha = NULL; + rindex = NULL; + rchg = NULL; + rhash = NULL; + recs = NULL; - return -1; - } - if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) { - - xdl_cha_free(&xdf->rcha); - return -1; - } + if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) + goto abort; + if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) + goto abort; hbits = xdl_hashbits((unsigned int) narec); hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) { - - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; memset(rhash, 0, hsize * sizeof(xrecord_t *)); nrec = 0; @@ -175,63 +172,30 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { narec *= 2; - if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rrecs = (xrecord_t **) xdl_realloc(recs, narec * sizeof(xrecord_t *)))) + goto abort; recs = rrecs; } - if (!(crec = xdl_cha_alloc(&xdf->rcha))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(crec = xdl_cha_alloc(&xdf->rcha))) + goto abort; crec->ptr = prev; crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; - if (xdl_classify_record(cf, rhash, hbits, crec) < 0) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (xdl_classify_record(cf, rhash, hbits, crec) < 0) + goto abort; } } - if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) { - - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rchg = (char *) xdl_malloc((nrec + 2) * sizeof(char)))) + goto abort; memset(rchg, 0, (nrec + 2) * sizeof(char)); - if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) { - - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } - if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) { - - xdl_free(rindex); - xdl_free(rchg); - xdl_free(rhash); - xdl_free(recs); - xdl_cha_free(&xdf->rcha); - return -1; - } + if (!(rindex = (long *) xdl_malloc((nrec + 1) * sizeof(long)))) + goto abort; + if (!(ha = (unsigned long *) xdl_malloc((nrec + 1) * sizeof(unsigned long)))) + goto abort; xdf->nrec = nrec; xdf->recs = recs; @@ -245,6 +209,15 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, xdf->dend = nrec - 1; return 0; + +abort: + xdl_free(ha); + xdl_free(rindex); + xdl_free(rchg); + xdl_free(rhash); + xdl_free(recs); + xdl_cha_free(&xdf->rcha); + return -1; } From 1d26b252f1128f7b31885811d7f481b6b7612bd7 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Thu, 7 Jul 2011 12:23:57 +0800 Subject: [PATCH 03/12] xdiff/xpatience: factor out fall-back-diff function This is in preparation for the histogram diff algorithm, which will also re-use much of the code to call the default Meyers diff algorithm. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xpatience.c | 27 ++------------------------- xdiff/xutils.c | 31 +++++++++++++++++++++++++++++++ xdiff/xutils.h | 2 ++ 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index e42c16a807..fdd7d0263f 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -287,34 +287,11 @@ static int walk_common_sequence(struct hashmap *map, struct entry *first, static int fall_back_to_classic_diff(struct hashmap *map, int line1, int count1, int line2, int count2) { - /* - * This probably does not work outside Git, since - * we have a very simple mmfile structure. - * - * Note: ideally, we would reuse the prepared environment, but - * the libxdiff interface does not (yet) allow for diffing only - * ranges of lines instead of the whole files. - */ - mmfile_t subfile1, subfile2; xpparam_t xpp; - xdfenv_t env; - - subfile1.ptr = (char *)map->env->xdf1.recs[line1 - 1]->ptr; - subfile1.size = map->env->xdf1.recs[line1 + count1 - 2]->ptr + - map->env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; - subfile2.ptr = (char *)map->env->xdf2.recs[line2 - 1]->ptr; - subfile2.size = map->env->xdf2.recs[line2 + count2 - 2]->ptr + - map->env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; xpp.flags = map->xpp->flags & ~XDF_PATIENCE_DIFF; - if (xdl_do_diff(&subfile1, &subfile2, &xpp, &env) < 0) - return -1; - memcpy(map->env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1); - memcpy(map->env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2); - - xdl_free_env(&env); - - return 0; + return xdl_fall_back_diff(map->env, &xpp, + line1, count1, line2, count2); } /* diff --git a/xdiff/xutils.c b/xdiff/xutils.c index ab6503460f..ded7c327dc 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -402,3 +402,34 @@ int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, return 0; } + +int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, + int line1, int count1, int line2, int count2) +{ + /* + * This probably does not work outside Git, since + * we have a very simple mmfile structure. + * + * Note: ideally, we would reuse the prepared environment, but + * the libxdiff interface does not (yet) allow for diffing only + * ranges of lines instead of the whole files. + */ + mmfile_t subfile1, subfile2; + xdfenv_t env; + + subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1]->ptr; + subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2]->ptr + + diff_env->xdf1.recs[line1 + count1 - 2]->size - subfile1.ptr; + subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1]->ptr; + subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2]->ptr + + diff_env->xdf2.recs[line2 + count2 - 2]->size - subfile2.ptr; + if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) + return -1; + + memcpy(diff_env->xdf1.rchg + line1 - 1, env.xdf1.rchg, count1); + memcpy(diff_env->xdf2.rchg + line2 - 1, env.xdf2.rchg, count2); + + xdl_free_env(&env); + + return 0; +} diff --git a/xdiff/xutils.h b/xdiff/xutils.h index d5de8292e0..674a657b08 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -41,6 +41,8 @@ int xdl_num_out(char *out, long val); long xdl_atol(char const *str, char const **next); int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, const char *func, long funclen, xdemitcb_t *ecb); +int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, + int line1, int count1, int line2, int count2); From 46c8f2988dc9a6012babd5833a6245ceff214483 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Thu, 7 Jul 2011 12:23:58 +0800 Subject: [PATCH 04/12] t4033-diff-patience: factor out tests Group the test cases into two functions, test_diff_(frobnitz|unique). This in preparation for the histogram diff algorithm, which would also re-use these test cases. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- t/lib-diff-alternative.sh | 165 ++++++++++++++++++++++++++++++++++++++ t/t4033-diff-patience.sh | 162 +------------------------------------ 2 files changed, 168 insertions(+), 159 deletions(-) create mode 100644 t/lib-diff-alternative.sh diff --git a/t/lib-diff-alternative.sh b/t/lib-diff-alternative.sh new file mode 100644 index 0000000000..75ffd9174f --- /dev/null +++ b/t/lib-diff-alternative.sh @@ -0,0 +1,165 @@ +#!/bin/sh + +test_diff_frobnitz() { + cat >file1 <<\EOF +#include + +// Frobs foo heartily +int frobnitz(int foo) +{ + int i; + for(i = 0; i < 10; i++) + { + printf("Your answer is: "); + printf("%d\n", foo); + } +} + +int fact(int n) +{ + if(n > 1) + { + return fact(n-1) * n; + } + return 1; +} + +int main(int argc, char **argv) +{ + frobnitz(fact(10)); +} +EOF + + cat >file2 <<\EOF +#include + +int fib(int n) +{ + if(n > 2) + { + return fib(n-1) + fib(n-2); + } + return 1; +} + +// Frobs foo heartily +int frobnitz(int foo) +{ + int i; + for(i = 0; i < 10; i++) + { + printf("%d\n", foo); + } +} + +int main(int argc, char **argv) +{ + frobnitz(fib(10)); +} +EOF + + cat >expect <<\EOF +diff --git a/file1 b/file2 +index 6faa5a3..e3af329 100644 +--- a/file1 ++++ b/file2 +@@ -1,26 +1,25 @@ + #include + ++int fib(int n) ++{ ++ if(n > 2) ++ { ++ return fib(n-1) + fib(n-2); ++ } ++ return 1; ++} ++ + // Frobs foo heartily + int frobnitz(int foo) + { + int i; + for(i = 0; i < 10; i++) + { +- printf("Your answer is: "); + printf("%d\n", foo); + } + } + +-int fact(int n) +-{ +- if(n > 1) +- { +- return fact(n-1) * n; +- } +- return 1; +-} +- + int main(int argc, char **argv) + { +- frobnitz(fact(10)); ++ frobnitz(fib(10)); + } +EOF + + STRATEGY=$1 + + test_expect_success "$STRATEGY diff" ' + test_must_fail git diff --no-index "--$STRATEGY" file1 file2 > output && + test_cmp expect output + ' + + test_expect_success "$STRATEGY diff output is valid" ' + mv file2 expect && + git apply < output && + test_cmp expect file2 + ' +} + +test_diff_unique() { + cat >uniq1 <<\EOF +1 +2 +3 +4 +5 +6 +EOF + + cat >uniq2 <<\EOF +a +b +c +d +e +f +EOF + + cat >expect <<\EOF +diff --git a/uniq1 b/uniq2 +index b414108..0fdf397 100644 +--- a/uniq1 ++++ b/uniq2 +@@ -1,6 +1,6 @@ +-1 +-2 +-3 +-4 +-5 +-6 ++a ++b ++c ++d ++e ++f +EOF + + STRATEGY=$1 + + test_expect_success 'completely different files' ' + test_must_fail git diff --no-index "--$STRATEGY" uniq1 uniq2 > output && + test_cmp expect output + ' +} + diff --git a/t/t4033-diff-patience.sh b/t/t4033-diff-patience.sh index 1eb14989df..3c9932edf3 100755 --- a/t/t4033-diff-patience.sh +++ b/t/t4033-diff-patience.sh @@ -3,166 +3,10 @@ test_description='patience diff algorithm' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-diff-alternative.sh -cat >file1 <<\EOF -#include +test_diff_frobnitz "patience" -// Frobs foo heartily -int frobnitz(int foo) -{ - int i; - for(i = 0; i < 10; i++) - { - printf("Your answer is: "); - printf("%d\n", foo); - } -} - -int fact(int n) -{ - if(n > 1) - { - return fact(n-1) * n; - } - return 1; -} - -int main(int argc, char **argv) -{ - frobnitz(fact(10)); -} -EOF - -cat >file2 <<\EOF -#include - -int fib(int n) -{ - if(n > 2) - { - return fib(n-1) + fib(n-2); - } - return 1; -} - -// Frobs foo heartily -int frobnitz(int foo) -{ - int i; - for(i = 0; i < 10; i++) - { - printf("%d\n", foo); - } -} - -int main(int argc, char **argv) -{ - frobnitz(fib(10)); -} -EOF - -cat >expect <<\EOF -diff --git a/file1 b/file2 -index 6faa5a3..e3af329 100644 ---- a/file1 -+++ b/file2 -@@ -1,26 +1,25 @@ - #include - -+int fib(int n) -+{ -+ if(n > 2) -+ { -+ return fib(n-1) + fib(n-2); -+ } -+ return 1; -+} -+ - // Frobs foo heartily - int frobnitz(int foo) - { - int i; - for(i = 0; i < 10; i++) - { -- printf("Your answer is: "); - printf("%d\n", foo); - } - } - --int fact(int n) --{ -- if(n > 1) -- { -- return fact(n-1) * n; -- } -- return 1; --} -- - int main(int argc, char **argv) - { -- frobnitz(fact(10)); -+ frobnitz(fib(10)); - } -EOF - -test_expect_success 'patience diff' ' - - test_must_fail git diff --no-index --patience file1 file2 > output && - test_cmp expect output - -' - -test_expect_success 'patience diff output is valid' ' - - mv file2 expect && - git apply < output && - test_cmp expect file2 - -' - -cat >uniq1 <<\EOF -1 -2 -3 -4 -5 -6 -EOF - -cat >uniq2 <<\EOF -a -b -c -d -e -f -EOF - -cat >expect <<\EOF -diff --git a/uniq1 b/uniq2 -index b414108..0fdf397 100644 ---- a/uniq1 -+++ b/uniq2 -@@ -1,6 +1,6 @@ --1 --2 --3 --4 --5 --6 -+a -+b -+c -+d -+e -+f -EOF - -test_expect_success 'completely different files' ' - - test_must_fail git diff --no-index --patience uniq1 uniq2 > output && - test_cmp expect output - -' +test_diff_unique "patience" test_done From 8c912eea94a2138e8bc608f7c390eb0b313effb0 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Tue, 12 Jul 2011 14:10:25 +0800 Subject: [PATCH 05/12] teach --histogram to diff Port JGit's HistogramDiff algorithm over to C. Rough numbers (TODO) show that it is faster than its --patience cousin, as well as the default Meyers algorithm. The implementation has been reworked to use structs and pointers, instead of bitmasks, thus doing away with JGit's 2^28 line limit. We also use xdiff's default hash table implementation (xdl_hash_bits() with XDL_HASHLONG()) for convenience. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- Makefile | 2 +- diff.c | 2 + merge-recursive.c | 2 + t/t4049-diff-histogram.sh | 12 ++ xdiff/xdiff.h | 1 + xdiff/xdiffi.c | 3 + xdiff/xdiffi.h | 2 + xdiff/xhistogram.c | 384 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 407 insertions(+), 1 deletion(-) create mode 100755 t/t4049-diff-histogram.sh create mode 100644 xdiff/xhistogram.c diff --git a/Makefile b/Makefile index e40ac0c7f5..f50d3c7770 100644 --- a/Makefile +++ b/Makefile @@ -1838,7 +1838,7 @@ ifndef NO_CURL GIT_OBJS += http.o http-walker.o remote-curl.o endif XDIFF_OBJS = xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ - xdiff/xmerge.o xdiff/xpatience.o + xdiff/xmerge.o xdiff/xpatience.o xdiff/xhistogram.o VCSSVN_OBJS = vcs-svn/string_pool.o vcs-svn/line_buffer.o \ vcs-svn/repo_tree.o vcs-svn/fast_export.o vcs-svn/svndump.o VCSSVN_TEST_OBJS = test-obj-pool.o test-string-pool.o \ diff --git a/diff.c b/diff.c index 61bedaed57..1b940ee72e 100644 --- a/diff.c +++ b/diff.c @@ -3369,6 +3369,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL); else if (!strcmp(arg, "--patience")) DIFF_XDL_SET(options, PATIENCE_DIFF); + else if (!strcmp(arg, "--histogram")) + DIFF_XDL_SET(options, HISTOGRAM_DIFF); /* flags options */ else if (!strcmp(arg, "--binary")) { diff --git a/merge-recursive.c b/merge-recursive.c index db9ba19ddf..0cc1e6fc14 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1759,6 +1759,8 @@ int parse_merge_opt(struct merge_options *o, const char *s) o->subtree_shift = s + strlen("subtree="); else if (!strcmp(s, "patience")) o->xdl_opts |= XDF_PATIENCE_DIFF; + else if (!strcmp(s, "histogram")) + o->xdl_opts |= XDF_HISTOGRAM_DIFF; else if (!strcmp(s, "ignore-space-change")) o->xdl_opts |= XDF_IGNORE_WHITESPACE_CHANGE; else if (!strcmp(s, "ignore-all-space")) diff --git a/t/t4049-diff-histogram.sh b/t/t4049-diff-histogram.sh new file mode 100755 index 0000000000..fd3e86a74f --- /dev/null +++ b/t/t4049-diff-histogram.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +test_description='histogram diff algorithm' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-diff-alternative.sh + +test_diff_frobnitz "histogram" + +test_diff_unique "histogram" + +test_done diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index 711048ea36..c26170ce5e 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -33,6 +33,7 @@ extern "C" { #define XDF_IGNORE_WHITESPACE_CHANGE (1 << 3) #define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 4) #define XDF_PATIENCE_DIFF (1 << 5) +#define XDF_HISTOGRAM_DIFF (1 << 6) #define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | XDF_IGNORE_WHITESPACE_CHANGE | XDF_IGNORE_WHITESPACE_AT_EOL) #define XDL_PATCH_NORMAL '-' diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index da67c04357..75a3922750 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -331,6 +331,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, if (xpp->flags & XDF_PATIENCE_DIFF) return xdl_do_patience_diff(mf1, mf2, xpp, xe); + if (xpp->flags & XDF_HISTOGRAM_DIFF) + return xdl_do_histogram_diff(mf1, mf2, xpp, xe); + if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) { return -1; diff --git a/xdiff/xdiffi.h b/xdiff/xdiffi.h index ad033a8e6a..7a92ea9c4d 100644 --- a/xdiff/xdiffi.h +++ b/xdiff/xdiffi.h @@ -57,5 +57,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, xdemitconf_t const *xecfg); int xdl_do_patience_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *env); +int xdl_do_histogram_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, + xdfenv_t *env); #endif /* #if !defined(XDIFFI_H) */ diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c new file mode 100644 index 0000000000..391333a1a4 --- /dev/null +++ b/xdiff/xhistogram.c @@ -0,0 +1,384 @@ +/* + * Copyright (C) 2010, Google Inc. + * and other copyright owners as documented in JGit's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "xinclude.h" +#include "xtypes.h" +#include "xdiff.h" + +#define MAX_PTR UINT_MAX +#define MAX_CNT UINT_MAX + +#define LINE_END(n) (line##n + count##n - 1) +#define LINE_END_PTR(n) (*line##n + *count##n - 1) + +struct histindex { + struct record { + unsigned int ptr, cnt; + struct record *next; + } **records, /* an ocurrence */ + **line_map; /* map of line to record chain */ + chastore_t rcha; + unsigned int *next_ptrs; + unsigned int table_bits, + records_size, + line_map_size; + + unsigned int max_chain_length, + key_shift, + ptr_shift; + + unsigned int cnt, + has_common; + + xdfenv_t *env; + xpparam_t const *xpp; +}; + +struct region { + unsigned int begin1, end1; + unsigned int begin2, end2; +}; + +#define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift]) + +#define NEXT_PTR(index, ptr) \ + (index->next_ptrs[(ptr) - index->ptr_shift]) + +#define CNT(index, ptr) \ + ((LINE_MAP(index, ptr))->cnt) + +#define REC(env, s, l) \ + (env->xdf##s.recs[l - 1]) + +static int cmp_recs(xpparam_t const *xpp, + xrecord_t *r1, xrecord_t *r2) +{ + return r1->ha == r2->ha && + xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, + xpp->flags); +} + +#define CMP_ENV(xpp, env, s1, l1, s2, l2) \ + (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) + +#define CMP(i, s1, l1, s2, l2) \ + (CMP_ENV(i->xpp, i->env, s1, l1, s2, l2)) + +#define TABLE_HASH(index, side, line) \ + XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) + +static int scanA(struct histindex *index, int line1, int count1) +{ + unsigned int ptr, tbl_idx; + unsigned int chain_len; + struct record **rec_chain, *rec; + + for (ptr = LINE_END(1); line1 <= ptr; ptr--) { + tbl_idx = TABLE_HASH(index, 1, ptr); + rec_chain = index->records + tbl_idx; + rec = *rec_chain; + + chain_len = 0; + while (rec) { + if (CMP(index, 1, rec->ptr, 1, ptr)) { + /* + * ptr is identical to another element. Insert + * it onto the front of the existing element + * chain. + */ + NEXT_PTR(index, ptr) = rec->ptr; + rec->ptr = ptr; + /* cap rec->cnt at MAX_CNT */ + rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); + LINE_MAP(index, ptr) = rec; + goto continue_scan; + } + + rec = rec->next; + chain_len++; + } + + if (chain_len == index->max_chain_length) + return -1; + + /* + * This is the first time we have ever seen this particular + * element in the sequence. Construct a new chain for it. + */ + if (!(rec = xdl_cha_alloc(&index->rcha))) + return -1; + rec->ptr = ptr; + rec->cnt = 1; + rec->next = *rec_chain; + *rec_chain = rec; + LINE_MAP(index, ptr) = rec; + +continue_scan: + ; /* no op */ + } + + return 0; +} + +static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, + int line1, int count1, int line2, int count2) +{ + unsigned int b_next = b_ptr + 1; + struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; + unsigned int as, ae, bs, be, np, rc; + int should_break; + + for (; rec; rec = rec->next) { + if (rec->cnt > index->cnt) { + if (!index->has_common) + index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr); + continue; + } + + as = rec->ptr; + if (!CMP(index, 1, as, 2, b_ptr)) + continue; + + index->has_common = 1; + for (;;) { + should_break = 0; + np = NEXT_PTR(index, as); + bs = b_ptr; + ae = as; + be = bs; + rc = rec->cnt; + + while (line1 < as && line2 < bs + && CMP(index, 1, as - 1, 2, bs - 1)) { + as--; + bs--; + if (1 < rc) + rc = XDL_MIN(rc, CNT(index, as)); + } + while (ae < LINE_END(1) && be < LINE_END(2) + && CMP(index, 1, ae + 1, 2, be + 1)) { + ae++; + be++; + if (1 < rc) + rc = XDL_MIN(rc, CNT(index, ae)); + } + + if (b_next <= be) + b_next = be + 1; + if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) { + lcs->begin1 = as; + lcs->begin2 = bs; + lcs->end1 = ae; + lcs->end2 = be; + index->cnt = rc; + } + + if (np == 0) + break; + + while (np <= ae) { + np = NEXT_PTR(index, np); + if (np == 0) { + should_break = 1; + break; + } + } + + if (should_break) + break; + + as = np; + } + } + return b_next; +} + +static int find_lcs(struct histindex *index, struct region *lcs, + int line1, int count1, int line2, int count2) { + int b_ptr; + + if (scanA(index, line1, count1)) + return -1; + + index->cnt = index->max_chain_length + 1; + + for (b_ptr = line2; b_ptr <= LINE_END(2); ) + b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2); + + return index->has_common && index->max_chain_length < index->cnt; +} + +static void reduce_common_start_end(xpparam_t const *xpp, xdfenv_t *env, + int *line1, int *count1, int *line2, int *count2) +{ + if (*count1 <= 1 || *count2 <= 1) + return; + while (*count1 > 1 && *count2 > 1 && CMP_ENV(xpp, env, 1, *line1, 2, *line2)) { + (*line1)++; + (*count1)--; + (*line2)++; + (*count2)--; + } + while (*count1 > 1 && *count2 > 1 && CMP_ENV(xpp, env, 1, LINE_END_PTR(1), 2, LINE_END_PTR(2))) { + (*count1)--; + (*count2)--; + } +} + +static int fall_back_to_classic_diff(struct histindex *index, + int line1, int count1, int line2, int count2) +{ + xpparam_t xpp; + xpp.flags = index->xpp->flags & ~XDF_HISTOGRAM_DIFF; + + return xdl_fall_back_diff(index->env, &xpp, + line1, count1, line2, count2); +} + +static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + struct histindex index; + struct region lcs; + int sz; + int result = -1; + + if (count1 <= 0 && count2 <= 0) + return 0; + + if (LINE_END(1) >= MAX_PTR) + return -1; + + if (!count1) { + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + return 0; + } else if (!count2) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + return 0; + } + + memset(&index, 0, sizeof(index)); + + index.env = env; + index.xpp = xpp; + + index.records = NULL; + index.line_map = NULL; + /* in case of early xdl_cha_free() */ + index.rcha.head = NULL; + + index.table_bits = xdl_hashbits(count1); + sz = index.records_size = 1 << index.table_bits; + sz *= sizeof(struct record *); + if (!(index.records = (struct record **) xdl_malloc(sz))) + goto cleanup; + memset(index.records, 0, sz); + + sz = index.line_map_size = count1; + sz *= sizeof(struct record *); + if (!(index.line_map = (struct record **) xdl_malloc(sz))) + goto cleanup; + memset(index.line_map, 0, sz); + + sz = index.line_map_size; + sz *= sizeof(unsigned int); + if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) + goto cleanup; + memset(index.next_ptrs, 0, sz); + + /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ + if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) + goto cleanup; + + index.ptr_shift = line1; + index.max_chain_length = 64; + + memset(&lcs, 0, sizeof(lcs)); + if (find_lcs(&index, &lcs, line1, count1, line2, count2)) + result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); + else { + result = 0; + if (lcs.begin1 == 0 && lcs.begin2 == 0) { + int ptr; + for (ptr = 0; ptr < count1; ptr++) + env->xdf1.rchg[line1 + ptr - 1] = 1; + for (ptr = 0; ptr < count2; ptr++) + env->xdf2.rchg[line2 + ptr - 1] = 1; + } else { + result = histogram_diff(xpp, env, + line1, lcs.begin1 - line1, + line2, lcs.begin2 - line2); + result = histogram_diff(xpp, env, + lcs.end1 + 1, LINE_END(1) - lcs.end1, + lcs.end2 + 1, LINE_END(2) - lcs.end2); + result *= -1; + } + } + +cleanup: + xdl_free(index.records); + xdl_free(index.line_map); + xdl_free(index.next_ptrs); + xdl_cha_free(&index.rcha); + + return result; +} + +int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, + xpparam_t const *xpp, xdfenv_t *env) +{ + int line1, line2, count1, count2; + + if (xdl_prepare_env(file1, file2, xpp, env) < 0) + return -1; + + line1 = line2 = 1; + count1 = env->xdf1.nrec; + count2 = env->xdf2.nrec; + + reduce_common_start_end(xpp, env, &line1, &count1, &line2, &count2); + + return histogram_diff(xpp, env, line1, count1, line2, count2); +} From 9f37c275938e1fbca7165872dad039874add09cd Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Tue, 12 Jul 2011 14:10:26 +0800 Subject: [PATCH 06/12] xdiff/xprepare: skip classification xdiff performs "classification" of records (xdl_classify_record()), replacing hashes (xrecord_t.ha) with a unique identifier of the record/line and building a hash table (xrecord_t.rhash) of records. This is then used to "cleanup" records (xdl_cleanup_records()). We don't need any of that in histogram diff, so we omit calls to these functions. We also skip allocating memory to the hash table, rhash, as it is no longer used. This gives us a small boost in performance. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 0f571db2d7..7556538df5 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -154,11 +154,15 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) goto abort; - hbits = xdl_hashbits((unsigned int) narec); - hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) - goto abort; - memset(rhash, 0, hsize * sizeof(xrecord_t *)); + if (xpp->flags & XDF_HISTOGRAM_DIFF) + hbits = hsize = 0; + else { + hbits = xdl_hashbits((unsigned int) narec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); + } nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { @@ -183,7 +187,8 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, crec->ha = hav; recs[nrec++] = crec; - if (xdl_classify_record(cf, rhash, hbits, crec) < 0) + if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && + xdl_classify_record(cf, rhash, hbits, crec) < 0) goto abort; } } @@ -240,7 +245,8 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, enl1 = xdl_guess_lines(mf1) + 1; enl2 = xdl_guess_lines(mf2) + 1; - if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { + if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && + xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { return -1; } @@ -257,9 +263,11 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, return -1; } - xdl_free_classifier(&cf); + if (!(xpp->flags & XDF_HISTOGRAM_DIFF)) + xdl_free_classifier(&cf); if (!(xpp->flags & XDF_PATIENCE_DIFF) && + !(xpp->flags & XDF_HISTOGRAM_DIFF) && xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { xdl_free_ctx(&xe->xdf2); From 86abba801575892a8a2b161aa29518c1ed05e1f1 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Tue, 12 Jul 2011 14:10:27 +0800 Subject: [PATCH 07/12] xdiff/xprepare: use a smaller sample size for histogram diff For histogram diff, we can afford a smaller sample size and thus a poorer estimate of the number of lines, as the hash table (rhash) won't be filled up/grown. This is safe as the final count of lines (xdf.nrecs) will be updated correctly anyway by xdl_prepare_ctx(). This gives us a small boost in performance. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 17 ++++++++++++++--- xdiff/xutils.c | 8 ++------ xdiff/xutils.h | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 7556538df5..dfbb0de987 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -26,6 +26,8 @@ #define XDL_KPDIS_RUN 4 #define XDL_MAX_EQLIMIT 1024 #define XDL_SIMSCAN_WINDOW 100 +#define XDL_GUESS_NLINES1 256 +#define XDL_GUESS_NLINES2 20 typedef struct s_xdlclass { @@ -239,11 +241,20 @@ static void xdl_free_ctx(xdfile_t *xdf) { int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdfenv_t *xe) { - long enl1, enl2; + long enl1, enl2, sample; xdlclassifier_t cf; - enl1 = xdl_guess_lines(mf1) + 1; - enl2 = xdl_guess_lines(mf2) + 1; + /* + * For histogram diff, we can afford a smaller sample size and + * thus a poorer estimate of the number of lines, as the hash + * table (rhash) won't be filled up/grown. The number of lines + * (nrecs) will be updated correctly anyway by + * xdl_prepare_ctx(). + */ + sample = xpp->flags & XDF_HISTOGRAM_DIFF ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1; + + enl1 = xdl_guess_lines(mf1, sample) + 1; + enl2 = xdl_guess_lines(mf2, sample) + 1; if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0) { diff --git a/xdiff/xutils.c b/xdiff/xutils.c index ded7c327dc..a45e89bbed 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -24,10 +24,6 @@ -#define XDL_GUESS_NLINES 256 - - - long xdl_bogosqrt(long n) { long i; @@ -159,12 +155,12 @@ void *xdl_cha_next(chastore_t *cha) { } -long xdl_guess_lines(mmfile_t *mf) { +long xdl_guess_lines(mmfile_t *mf, long sample) { long nl = 0, size, tsize = 0; char const *data, *cur, *top; if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { - for (top = data + size; nl < XDL_GUESS_NLINES;) { + for (top = data + size; nl < sample;) { if (cur >= top) { tsize += (long) (cur - data); if (!(cur = data = xdl_mmfile_next(mf, &size))) diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 674a657b08..714719a89c 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -33,7 +33,7 @@ void xdl_cha_free(chastore_t *cha); void *xdl_cha_alloc(chastore_t *cha); void *xdl_cha_first(chastore_t *cha); void *xdl_cha_next(chastore_t *cha); -long xdl_guess_lines(mmfile_t *mf); +long xdl_guess_lines(mmfile_t *mf, long sample); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); unsigned long xdl_hash_record(char const **data, char const *top, long flags); unsigned int xdl_hashbits(unsigned int size); From 286e2b1a23d523b04c29f92eb5cee31a0a92dc12 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 3 Aug 2011 09:25:31 +0200 Subject: [PATCH 08/12] Make test number unique Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- t/{t4049-diff-histogram.sh => t4050-diff-histogram.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename t/{t4049-diff-histogram.sh => t4050-diff-histogram.sh} (100%) diff --git a/t/t4049-diff-histogram.sh b/t/t4050-diff-histogram.sh similarity index 100% rename from t/t4049-diff-histogram.sh rename to t/t4050-diff-histogram.sh From 739864b1ffb379120df9cfa4111c4ec20b823cfd Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Mon, 1 Aug 2011 12:20:07 +0800 Subject: [PATCH 09/12] xdiff: do away with xdl_mmfile_next() Given our simple mmfile structure, xdl_mmfile_next() calls are redundant. Do away with calls to them. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xdiff.h | 1 - xdiff/xprepare.c | 7 +------ xdiff/xutils.c | 14 +------------- 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index c26170ce5e..4beb10c678 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -106,7 +106,6 @@ typedef struct s_bdiffparam { #define xdl_realloc(ptr,x) realloc(ptr,x) void *xdl_mmfile_first(mmfile_t *mmf, long *size); -void *xdl_mmfile_next(mmfile_t *mmf, long *size); long xdl_mmfile_size(mmfile_t *mmf); int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index dfbb0de987..620fc9a657 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -168,12 +168,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { - for (top = blk + bsize;;) { - if (cur >= top) { - if (!(cur = blk = xdl_mmfile_next(mf, &bsize))) - break; - top = blk + bsize; - } + for (top = blk + bsize; cur < top; ) { prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); if (nrec >= narec) { diff --git a/xdiff/xutils.c b/xdiff/xutils.c index a45e89bbed..0de084e53f 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -67,12 +67,6 @@ void *xdl_mmfile_first(mmfile_t *mmf, long *size) } -void *xdl_mmfile_next(mmfile_t *mmf, long *size) -{ - return NULL; -} - - long xdl_mmfile_size(mmfile_t *mmf) { return mmf->size; @@ -160,13 +154,7 @@ long xdl_guess_lines(mmfile_t *mf, long sample) { char const *data, *cur, *top; if ((cur = data = xdl_mmfile_first(mf, &size)) != NULL) { - for (top = data + size; nl < sample;) { - if (cur >= top) { - tsize += (long) (cur - data); - if (!(cur = data = xdl_mmfile_next(mf, &size))) - break; - top = data + size; - } + for (top = data + size; nl < sample && cur < top; ) { nl++; if (!(cur = memchr(cur, '\n', top - cur))) cur = top; From 19f7a9c57785161cdfaa6036eb0ef90853333724 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Mon, 1 Aug 2011 12:20:08 +0800 Subject: [PATCH 10/12] xdiff/xhistogram: rework handling of recursed results Previously we were over-complicating matters by trying to combine the recursed results. Now, terminate immediately if a recursive call failed and return its result. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 391333a1a4..4cfafa1b3a 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -339,21 +339,24 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, if (find_lcs(&index, &lcs, line1, count1, line2, count2)) result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); else { - result = 0; if (lcs.begin1 == 0 && lcs.begin2 == 0) { int ptr; for (ptr = 0; ptr < count1; ptr++) env->xdf1.rchg[line1 + ptr - 1] = 1; for (ptr = 0; ptr < count2; ptr++) env->xdf2.rchg[line2 + ptr - 1] = 1; + result = 0; } else { result = histogram_diff(xpp, env, - line1, lcs.begin1 - line1, - line2, lcs.begin2 - line2); + line1, lcs.begin1 - line1, + line2, lcs.begin2 - line2); + if (result) + goto cleanup; result = histogram_diff(xpp, env, - lcs.end1 + 1, LINE_END(1) - lcs.end1, - lcs.end2 + 1, LINE_END(2) - lcs.end2); - result *= -1; + lcs.end1 + 1, LINE_END(1) - lcs.end1, + lcs.end2 + 1, LINE_END(2) - lcs.end2); + if (result) + goto cleanup; } } From 43ca7530df839321cc4f62b54a0098f2a1cfb4ad Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Mon, 1 Aug 2011 12:20:09 +0800 Subject: [PATCH 11/12] xdiff/xhistogram: rely on xdl_trim_ends() Do away with reduce_common_start_end() and use xdf->dstart and xdf->dend set by xdl_trim_ends() that similarly tells us where the first unmatched line from the start and end occurs. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 4cfafa1b3a..130ceeed88 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -102,7 +102,7 @@ static int cmp_recs(xpparam_t const *xpp, (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) #define CMP(i, s1, l1, s2, l2) \ - (CMP_ENV(i->xpp, i->env, s1, l1, s2, l2)) + (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) #define TABLE_HASH(index, side, line) \ XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) @@ -248,23 +248,6 @@ static int find_lcs(struct histindex *index, struct region *lcs, return index->has_common && index->max_chain_length < index->cnt; } -static void reduce_common_start_end(xpparam_t const *xpp, xdfenv_t *env, - int *line1, int *count1, int *line2, int *count2) -{ - if (*count1 <= 1 || *count2 <= 1) - return; - while (*count1 > 1 && *count2 > 1 && CMP_ENV(xpp, env, 1, *line1, 2, *line2)) { - (*line1)++; - (*count1)--; - (*line2)++; - (*count2)--; - } - while (*count1 > 1 && *count2 > 1 && CMP_ENV(xpp, env, 1, LINE_END_PTR(1), 2, LINE_END_PTR(2))) { - (*count1)--; - (*count2)--; - } -} - static int fall_back_to_classic_diff(struct histindex *index, int line1, int count1, int line2, int count2) { @@ -372,16 +355,10 @@ cleanup: int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, xpparam_t const *xpp, xdfenv_t *env) { - int line1, line2, count1, count2; - if (xdl_prepare_env(file1, file2, xpp, env) < 0) return -1; - line1 = line2 = 1; - count1 = env->xdf1.nrec; - count2 = env->xdf2.nrec; - - reduce_common_start_end(xpp, env, &line1, &count1, &line2, &count2); - - return histogram_diff(xpp, env, line1, count1, line2, count2); + return histogram_diff(xpp, env, + env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1, + env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1); } From 6486a84cb8e69ddf03b2139db3d968400e05f5b3 Mon Sep 17 00:00:00 2001 From: Tay Ray Chuan Date: Mon, 1 Aug 2011 12:20:10 +0800 Subject: [PATCH 12/12] xdiff/xhistogram: drop need for additional variable Having an additional variable (ptr) instead of changing line(1|2) and count(1|2) was for debugging purposes. Signed-off-by: Tay Ray Chuan Signed-off-by: Junio C Hamano --- xdiff/xhistogram.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 130ceeed88..18f6f997c3 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -323,11 +323,10 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); else { if (lcs.begin1 == 0 && lcs.begin2 == 0) { - int ptr; - for (ptr = 0; ptr < count1; ptr++) - env->xdf1.rchg[line1 + ptr - 1] = 1; - for (ptr = 0; ptr < count2; ptr++) - env->xdf2.rchg[line2 + ptr - 1] = 1; + while (count1--) + env->xdf1.rchg[line1++ - 1] = 1; + while (count2--) + env->xdf2.rchg[line2++ - 1] = 1; result = 0; } else { result = histogram_diff(xpp, env,