From 02e665ce491296245f474dafdc02d47a6c8afa86 Mon Sep 17 00:00:00 2001 From: David Kastrup Date: Sat, 8 Sep 2007 23:25:55 +0200 Subject: [PATCH] diff-delta.c: Rationalize culling of hash buckets The previous hash bucket culling resulted in a somewhat unpredictable number of hash bucket entries in the order of magnitude of HASH_LIMIT. Replace this with a Bresenham-like algorithm leaving us with exactly HASH_LIMIT entries by uniform culling. Signed-off-by: David Kastrup Acked-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- diff-delta.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/diff-delta.c b/diff-delta.c index 7f8a60de18..9e440a9299 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -207,19 +207,40 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) * the reference buffer. */ for (i = 0; i < hsize; i++) { - if (hash_count[i] < HASH_LIMIT) + int acc; + + if (hash_count[i] <= HASH_LIMIT) continue; + + entries -= hash_count[i] - HASH_LIMIT; + /* We leave exactly HASH_LIMIT entries in the bucket */ + entry = hash[i]; + acc = 0; do { - struct unpacked_index_entry *keep = entry; - int skip = hash_count[i] / HASH_LIMIT; - do { - --entries; - entry = entry->next; - } while(--skip && entry); - ++entries; - keep->next = entry; - } while(entry); + acc += hash_count[i] - HASH_LIMIT; + if (acc > 0) { + struct unpacked_index_entry *keep = entry; + do { + entry = entry->next; + acc -= HASH_LIMIT; + } while (acc > 0); + keep->next = entry->next; + } + entry = entry->next; + } while (entry); + + /* Assume that this loop is gone through exactly + * HASH_LIMIT times and is entered and left with + * acc==0. So the first statement in the loop + * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT + * to the accumulator, and the inner loop consequently + * is run (hash_count[i]-HASH_LIMIT) times, removing + * one element from the list each time. Since acc + * balances out to 0 at the final run, the inner loop + * body can't be left with entry==NULL. So we indeed + * encounter entry==NULL in the outer loop only. + */ } free(hash_count);