From af626ac0e02570e3afac8b4238199157181d43c2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 14 Dec 2023 17:24:44 -0500 Subject: [PATCH] pack-bitmap: enable reuse from all bitmapped packs Now that both the pack-bitmap and pack-objects code are prepared to handle marking and using objects from multiple bitmapped packs for verbatim reuse, allow marking objects from all bitmapped packs as eligible for reuse. Within the `reuse_partial_packfile_from_bitmap()` function, we no longer only mark the pack whose first object is at bit position zero for reuse, and instead mark any pack contained in the MIDX as a reuse candidate. Provide a handful of test cases in a new script (t5332) exercising interesting behavior for multi-pack reuse to ensure that we performed all of the previous steps correctly. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/pack.txt | 16 ++- builtin/pack-objects.c | 6 +- pack-bitmap.c | 34 ++++-- pack-bitmap.h | 3 +- t/t5332-multi-pack-reuse.sh | 203 ++++++++++++++++++++++++++++++++++ 5 files changed, 245 insertions(+), 17 deletions(-) create mode 100755 t/t5332-multi-pack-reuse.sh diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index fe100d0fb7..9c630863e6 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option to linkgit:git-repack[1]. pack.allowPackReuse:: - When true or "single", and when reachability bitmaps are enabled, - pack-objects will try to send parts of the bitmapped packfile - verbatim. This can reduce memory and CPU usage to serve fetches, - but might result in sending a slightly larger pack. Defaults to - true. + When true or "single", and when reachability bitmaps are + enabled, pack-objects will try to send parts of the bitmapped + packfile verbatim. When "multi", and when a multi-pack + reachability bitmap is available, pack-objects will try to send + parts of all packs in the MIDX. ++ + If only a single pack bitmap is available, and + `pack.allowPackReuse` is set to "multi", reuse parts of just the + bitmapped packfile. This can reduce memory and CPU usage to + serve fetches, but might result in sending a slightly larger + pack. Defaults to true. pack.island:: An extended regular expression configuring a set of delta diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 684698f679..5d3c42035b 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -232,6 +232,7 @@ static int use_bitmap_index = -1; static enum { NO_PACK_REUSE = 0, SINGLE_PACK_REUSE, + MULTI_PACK_REUSE, } allow_pack_reuse = SINGLE_PACK_REUSE; static enum { WRITE_BITMAP_FALSE = 0, @@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v, if (res < 0) { if (!strcasecmp(v, "single")) allow_pack_reuse = SINGLE_PACK_REUSE; + else if (!strcasecmp(v, "multi")) + allow_pack_reuse = MULTI_PACK_REUSE; else die(_("invalid pack.allowPackReuse value: '%s'"), v); } else if (res) { @@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs) reuse_partial_packfile_from_bitmap(bitmap_git, &reuse_packfiles, &reuse_packfiles_nr, - &reuse_packfile_bitmap); + &reuse_packfile_bitmap, + allow_pack_reuse == MULTI_PACK_REUSE); if (reuse_packfiles) { reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); diff --git a/pack-bitmap.c b/pack-bitmap.c index 242a5908f7..229a11fb00 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -2040,7 +2040,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb) void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, - struct bitmap **reuse_out) + struct bitmap **reuse_out, + int multi_pack_reuse) { struct repository *r = the_repository; struct bitmapped_pack *packs = NULL; @@ -2064,15 +2065,30 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, free(packs); return; } + if (!pack.bitmap_nr) - continue; /* no objects from this pack */ - if (pack.bitmap_pos) - continue; /* not preferred pack */ + continue; + + if (!multi_pack_reuse && pack.bitmap_pos) { + /* + * If we're only reusing a single pack, skip + * over any packs which are not positioned at + * the beginning of the MIDX bitmap. + * + * This is consistent with the existing + * single-pack reuse behavior, which only reuses + * parts of the MIDX's preferred pack. + */ + continue; + } ALLOC_GROW(packs, packs_nr + 1, packs_alloc); memcpy(&packs[packs_nr++], &pack, sizeof(pack)); objects_nr += pack.p->num_objects; + + if (!multi_pack_reuse) + break; } QSORT(packs, packs_nr, bitmapped_pack_cmp); @@ -2080,10 +2096,10 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, ALLOC_GROW(packs, packs_nr + 1, packs_alloc); packs[packs_nr].p = bitmap_git->pack; - packs[packs_nr].bitmap_pos = 0; packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects; + packs[packs_nr].bitmap_pos = 0; - objects_nr = packs[packs_nr++].p->num_objects; + objects_nr = packs[packs_nr++].bitmap_nr; } word_alloc = objects_nr / BITS_IN_EWORD; @@ -2091,10 +2107,8 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, word_alloc++; reuse = bitmap_word_alloc(word_alloc); - if (packs_nr != 1) - BUG("pack reuse not yet implemented for multiple packs"); - - reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse); + for (i = 0; i < packs_nr; i++) + reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); if (bitmap_is_empty(reuse)) { free(packs); diff --git a/pack-bitmap.h b/pack-bitmap.h index 179b343912..c7dea13217 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -80,7 +80,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, struct bitmapped_pack **packs_out, size_t *packs_nr_out, - struct bitmap **reuse_out); + struct bitmap **reuse_out, + int multi_pack_reuse); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh new file mode 100755 index 0000000000..2ba788b042 --- /dev/null +++ b/t/t5332-multi-pack-reuse.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='pack-objects multi-pack reuse' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-bitmap.sh + +objdir=.git/objects +packdir=$objdir/pack + +test_pack_reused () { + test_trace2_data pack-objects pack-reused "$1" +} + +test_packs_reused () { + test_trace2_data pack-objects packs-reused "$1" +} + + +# pack_position objects && + grep "$1" objects | cut -d" " -f1 +} + +test_expect_success 'preferred pack is reused for single-pack reuse' ' + test_config pack.allowPackReuse single && + + for i in A B + do + test_commit "$i" && + git repack -d || return 1 + done && + + git multi-pack-index write --bitmap && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 3 in <<-EOF && + $(git rev-parse C) + ^$(git rev-parse A) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs /dev/null && + + test_pack_reused 6 trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 9 in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + test_pack_reused 3 in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + test_pack_reused 3 f && + git add f && + test_tick && + git commit -m "delta" && + delta="$(git rev-parse HEAD)" && + + test_seq 64 >f && + test_tick && + git commit -a -m "base" && + base="$(git rev-parse HEAD)" && + + test_commit other && + + git repack -d && + + have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" && + + git multi-pack-index write --bitmap && + + cat >in <<-EOF && + $(git rev-parse other) + ^$base + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + # We can only reuse the 3 objects corresponding to "other" from + # the latest pack. + # + # This is because even though we want "delta", we do not want + # "base", meaning that we have to inflate the delta/base-pair + # corresponding to the blob in commit "delta", which bypasses + # the pack-reuse mechanism. + # + # The remaining objects from the other pack are similarly not + # reused because their objects are on the uninteresting side of + # the query. + test_pack_reused 3 in <<-EOF && + $(git rev-parse $base) + ^$(git rev-parse $delta) + EOF + + P="$(git pack-objects --revs $packdir/pack trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --all >/dev/null && + + packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" && + objects_nr="$(git rev-list --count --all --objects)" && + + test_pack_reused $(($objects_nr - 1))