From a05f02b1d9a1253e11a327c95cd47cbd24317ba6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Sep 2021 18:06:02 -0400 Subject: [PATCH 1/7] t/helper/test-bitmap.c: add 'dump-hashes' mode The pack-bitmap writer code is about to learn how to propagate values from an existing hash-cache. To prepare, teach the test-bitmap helper to dump the values from a bitmap's hash-cache extension in order to test those changes. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 27 +++++++++++++++++++++++++++ pack-bitmap.h | 1 + t/helper/test-bitmap.c | 10 +++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 8504110a4d..04de387318 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1742,6 +1742,33 @@ int test_bitmap_commits(struct repository *r) return 0; } +int test_bitmap_hashes(struct repository *r) +{ + struct bitmap_index *bitmap_git = prepare_bitmap_git(r); + struct object_id oid; + uint32_t i, index_pos; + + if (!bitmap_git->hashes) + goto cleanup; + + for (i = 0; i < bitmap_num_objects(bitmap_git); i++) { + if (bitmap_is_midx(bitmap_git)) + index_pos = pack_pos_to_midx(bitmap_git->midx, i); + else + index_pos = pack_pos_to_index(bitmap_git->pack, i); + + nth_bitmap_object_oid(bitmap_git, &oid, index_pos); + + printf("%s %"PRIu32"\n", + oid_to_hex(&oid), get_be32(bitmap_git->hashes + index_pos)); + } + +cleanup: + free_bitmap_index(bitmap_git); + + return 0; +} + int rebuild_bitmap(const uint32_t *reposition, struct ewah_bitmap *source, struct bitmap *dest) diff --git a/pack-bitmap.h b/pack-bitmap.h index 469090bad2..ed46d27077 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -52,6 +52,7 @@ void traverse_bitmap_commit_list(struct bitmap_index *, show_reachable_fn show_reachable); void test_bitmap_walk(struct rev_info *revs); int test_bitmap_commits(struct repository *r); +int test_bitmap_hashes(struct repository *r); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, struct list_objects_filter_options *filter, int filter_provided_objects); diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c index 134a1e9d76..ff35f5999b 100644 --- a/t/helper/test-bitmap.c +++ b/t/helper/test-bitmap.c @@ -7,6 +7,11 @@ static int bitmap_list_commits(void) return test_bitmap_commits(the_repository); } +static int bitmap_dump_hashes(void) +{ + return test_bitmap_hashes(the_repository); +} + int cmd__bitmap(int argc, const char **argv) { setup_git_directory(); @@ -16,9 +21,12 @@ int cmd__bitmap(int argc, const char **argv) if (!strcmp(argv[1], "list-commits")) return bitmap_list_commits(); + if (!strcmp(argv[1], "dump-hashes")) + return bitmap_dump_hashes(); usage: - usage("\ttest-tool bitmap list-commits"); + usage("\ttest-tool bitmap list-commits\n" + "\ttest-tool bitmap dump-hashes"); return -1; } From 8de300e1f7ebe7099ce6ce60f6c6fb494e6703b2 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Sep 2021 18:06:04 -0400 Subject: [PATCH 2/7] pack-bitmap.c: propagate namehash values from existing bitmaps When an old bitmap exists while writing a new one, we load it and build a "reposition" table which maps bit positions of objects from the old bitmap to their respective positions in the new bitmap. This can help when we encounter a commit which was selected in both the old and new bitmap, since we only need to permute its bit (not recompute it from scratch). We do not, however, repurpose existing namehash values in the case of the hash-cache extension. There has been thus far no good reason to do so, since all of the namehash values for objects in the new bitmap would be populated during the traversal that was just performed by pack-objects when generating single-pack reachability bitmaps. But this isn't the case for multi-pack bitmaps, which are written via `git multi-pack-index write --bitmap` and do not perform any traversal. In this case all namehash values are set to zero, but we don't even bother to check the `pack.writeBitmapHashcache` option anyway, so it fails to matter. There are two approaches we could take to fill in non-zero hash-cache values: - have either the multi-pack-index builtin run its own traversal to attempt to fill in some values, or let a hypothetical caller (like `pack-objects` when `repack` eventually drives the `multi-pack-index` builtin) fill in the values they found during their traversal - or copy any existing namehash values that were stored in an existing bitmap to their corresponding positions in the new bitmap In a system where a repository is generally repacked with `git repack --geometric=` and occasionally repacked with `git repack -a`, the hash-cache coverage will tend towards all objects. Since populating the hash-cache is additive (i.e., doing so only helps our delta search), any intermediate lack of full coverage is just fine. So let's start by just propagating any values from the existing hash-cache if we see one. The next patch will respect the `pack.writeBitmapHashcache` option while writing MIDX bitmaps, and then test this new behavior. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 04de387318..33a3732992 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1818,18 +1818,20 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, for (i = 0; i < num_objects; ++i) { struct object_id oid; struct object_entry *oe; + uint32_t index_pos; if (bitmap_is_midx(bitmap_git)) - nth_midxed_object_oid(&oid, - bitmap_git->midx, - pack_pos_to_midx(bitmap_git->midx, i)); + index_pos = pack_pos_to_midx(bitmap_git->midx, i); else - nth_packed_object_id(&oid, bitmap_git->pack, - pack_pos_to_index(bitmap_git->pack, i)); + index_pos = pack_pos_to_index(bitmap_git->pack, i); + nth_bitmap_object_oid(bitmap_git, &oid, index_pos); oe = packlist_find(mapping, &oid); - if (oe) + if (oe) { reposition[i] = oe_in_pack_pos(mapping, oe) + 1; + if (bitmap_git->hashes && !oe->hash) + oe->hash = get_be32(bitmap_git->hashes + index_pos); + } } return reposition; From caca3c9f07f90645e32c284c88c379109abf59be Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Sep 2021 18:06:06 -0400 Subject: [PATCH 3/7] midx.c: respect 'pack.writeBitmapHashcache' when writing bitmaps In the previous commit, the bitmap writing code learned to propagate values from an existing hash-cache extension into the bitmap that it is writing. Now that that functionality exists, let's expose it by teaching the 'git multi-pack-index' builtin to respect the `pack.writeBitmapHashCache` option so that the hash-cache may be written at all. Two minor points worth noting here: - The 'git multi-pack-index write' sub-command didn't previously read any configuration (instead this is handled in the base command). A separate handler is added here to respect this write-specific config option. - I briefly considered adding a 'bitmap_flags' field to the static options struct, but decided against it since it would require plumbing through a new parameter to the write_midx_file() function. Instead, a new MIDX-specific flag is added, which is translated to the corresponding bitmap one. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- Documentation/config/pack.txt | 4 ++++ builtin/multi-pack-index.c | 21 +++++++++++++++++++++ midx.c | 6 +++++- midx.h | 1 + 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index 763f7af7c4..ad7f73a1ea 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -159,6 +159,10 @@ pack.writeBitmapHashCache:: between an older, bitmapped pack and objects that have been pushed since the last gc). The downside is that it consumes 4 bytes per object of disk space. Defaults to true. ++ +When writing a multi-pack reachability bitmap, no new namehashes are +computed; instead, any namehashes stored in an existing bitmap are +permuted into their appropriate location when writing a new bitmap. pack.writeReverseIndex:: When true, git will write a corresponding .rev file (see: diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 73c0113b48..578ffea6e8 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -61,6 +61,23 @@ static struct option *add_common_options(struct option *prev) return parse_options_concat(common_opts, prev); } +static int git_multi_pack_index_write_config(const char *var, const char *value, + void *cb) +{ + if (!strcmp(var, "pack.writebitmaphashcache")) { + if (git_config_bool(var, value)) + opts.flags |= MIDX_WRITE_BITMAP_HASH_CACHE; + else + opts.flags &= ~MIDX_WRITE_BITMAP_HASH_CACHE; + } + + /* + * We should never make a fall-back call to 'git_default_config', since + * this was already called in 'cmd_multi_pack_index()'. + */ + return 0; +} + static int cmd_multi_pack_index_write(int argc, const char **argv) { struct option *options; @@ -73,6 +90,10 @@ static int cmd_multi_pack_index_write(int argc, const char **argv) OPT_END(), }; + opts.flags |= MIDX_WRITE_BITMAP_HASH_CACHE; + + git_config(git_multi_pack_index_write_config, NULL); + options = add_common_options(builtin_multi_pack_index_write_options); trace2_cmd_mode(argv[0]); diff --git a/midx.c b/midx.c index 864034a6ad..38c8600458 100644 --- a/midx.c +++ b/midx.c @@ -1008,9 +1008,13 @@ static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, struct pack_idx_entry **index; struct commit **commits = NULL; uint32_t i, commits_nr; + uint16_t options = 0; char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); int ret; + if (flags & MIDX_WRITE_BITMAP_HASH_CACHE) + options |= BITMAP_OPT_HASH_CACHE; + prepare_midx_packing_data(&pdata, ctx); commits = find_commits_for_midx_bitmap(&commits_nr, ctx); @@ -1049,7 +1053,7 @@ static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, goto cleanup; bitmap_writer_set_checksum(midx_hash); - bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, 0); + bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, options); cleanup: free(index); diff --git a/midx.h b/midx.h index aa3da557bb..541d9ac728 100644 --- a/midx.h +++ b/midx.h @@ -44,6 +44,7 @@ struct multi_pack_index { #define MIDX_PROGRESS (1 << 0) #define MIDX_WRITE_REV_INDEX (1 << 1) #define MIDX_WRITE_BITMAP (1 << 2) +#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) const unsigned char *get_midx_checksum(struct multi_pack_index *m); char *get_midx_filename(const char *object_dir); From 2082224f17548d25ac8c582b4d12fa63963c988d Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Sep 2021 18:06:09 -0400 Subject: [PATCH 4/7] p5326: create missing 'perf-tag' tag Some of the tests in test_full_bitmap rely on having a tag named perf-tag in place. We could create it in test_full_bitmap(), but we want to have it in place before the repack starts. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/perf/p5326-multi-pack-bitmaps.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/perf/p5326-multi-pack-bitmaps.sh b/t/perf/p5326-multi-pack-bitmaps.sh index 5845109ac7..51b5636259 100755 --- a/t/perf/p5326-multi-pack-bitmaps.sh +++ b/t/perf/p5326-multi-pack-bitmaps.sh @@ -10,6 +10,12 @@ test_expect_success 'enable multi-pack index' ' git config core.multiPackIndex true ' +# we need to create the tag up front such that it is covered by the repack and +# thus by generated bitmaps. +test_expect_success 'create tags' ' + git tag --message="tag pointing to HEAD" perf-tag HEAD +' + test_perf 'setup multi-pack index' ' git repack -ad && git multi-pack-index write --bitmap From 97b89c8150edef2ffd7db2eb072bb898cfea05ca Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Tue, 14 Sep 2021 18:06:11 -0400 Subject: [PATCH 5/7] p5326: don't set core.multiPackIndex unnecessarily When this performance test was originally written, `core.multiPackIndex` was not the default and thus had to be enabled. But now that we have 18e449f86b (midx: enable core.multiPackIndex by default, 2020-09-25), we no longer need this. Drop the unnecessary setup (even though it's not hurting anything, it is unnecessary at best and confusing at worst). Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/perf/p5326-multi-pack-bitmaps.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/t/perf/p5326-multi-pack-bitmaps.sh b/t/perf/p5326-multi-pack-bitmaps.sh index 51b5636259..a9c5499537 100755 --- a/t/perf/p5326-multi-pack-bitmaps.sh +++ b/t/perf/p5326-multi-pack-bitmaps.sh @@ -6,10 +6,6 @@ test_description='Tests performance using midx bitmaps' test_perf_large_repo -test_expect_success 'enable multi-pack index' ' - git config core.multiPackIndex true -' - # we need to create the tag up front such that it is covered by the repack and # thus by generated bitmaps. test_expect_success 'create tags' ' From bf4a60874af992655c139c612c06758353495e3b Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 17 Sep 2021 17:21:27 -0400 Subject: [PATCH 6/7] p5326: generate pack bitmaps before writing the MIDX bitmap To help test the performance of permuting the contents of the hash-cache when generating a MIDX bitmap, we need a bitmap which has its hash-cache populated. And since multi-pack bitmaps don't add *new* values to the hash-cache, we have to rely on a single-pack bitmap to generate those values for us. Therefore, generate a pack bitmap before the MIDX one in order to ensure that the MIDX bitmap has entries in its hash-cache. Since we don't want to time generating the pack bitmap, move that to a non-perf test run before we try to generate the MIDX bitmap. Likewise, get rid of the pack bitmap afterwords, to make certain that we are not accidentally using it in the performance tests run later on. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/perf/p5326-multi-pack-bitmaps.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/t/perf/p5326-multi-pack-bitmaps.sh b/t/perf/p5326-multi-pack-bitmaps.sh index a9c5499537..f2fa228f16 100755 --- a/t/perf/p5326-multi-pack-bitmaps.sh +++ b/t/perf/p5326-multi-pack-bitmaps.sh @@ -12,11 +12,18 @@ test_expect_success 'create tags' ' git tag --message="tag pointing to HEAD" perf-tag HEAD ' +test_expect_success 'start with bitmapped pack' ' + git repack -adb +' + test_perf 'setup multi-pack index' ' - git repack -ad && git multi-pack-index write --bitmap ' +test_expect_success 'drop pack bitmap' ' + rm -f .git/objects/pack/pack-*.bitmap +' + test_full_bitmap test_expect_success 'create partial bitmap state' ' From 54156af0d66b64cfa290ffce302af05d256d9b9c Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Fri, 17 Sep 2021 17:21:29 -0400 Subject: [PATCH 7/7] t5326: test propagating hashcache values Now that we both can propagate values from the hashcache, and respect the configuration to enable the hashcache at all, test that both of these function correctly by hardening their behavior with a test. Like the hash-cache in classic single-pack bitmaps, this helps more proportionally the more up-to-date your bitmap coverage is. When our bitmap coverage is out-of-date with the ref tips, we spend more time proportionally traversing, and all of that traversal gets the name-hash filled in. But for the up-to-date bitmaps, this helps quite a bit. These numbers are on git.git, with `pack.threads=1` to help see the difference reflected in the overall runtime. Test origin/tb/multi-pack-bitmaps HEAD ------------------------------------------------------------------------------------- 5326.4: simulated clone 1.87(1.80+0.07) 1.46(1.42+0.03) -21.9% 5326.5: simulated fetch 2.66(2.61+0.04) 1.47(1.43+0.04) -44.7% 5326.6: pack to file (bitmap) 2.74(2.62+0.12) 1.89(1.82+0.07) -31.0% Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t5326-multi-pack-bitmaps.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh index 4ad7c2c969..ec4aa89f63 100755 --- a/t/t5326-multi-pack-bitmaps.sh +++ b/t/t5326-multi-pack-bitmaps.sh @@ -283,4 +283,34 @@ test_expect_success 'pack.preferBitmapTips' ' ) ' +test_expect_success 'hash-cache values are propagated from pack bitmaps' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit base && + test_commit base2 && + git repack -adb && + + test-tool bitmap dump-hashes >pack.raw && + test_file_not_empty pack.raw && + sort pack.raw >pack.hashes && + + test_commit new && + git repack && + git multi-pack-index write --bitmap && + + test-tool bitmap dump-hashes >midx.raw && + sort midx.raw >midx.hashes && + + # ensure that every namehash in the pack bitmap can be found in + # the midx bitmap (i.e., that there are no oid-namehash pairs + # unique to the pack bitmap). + comm -23 pack.hashes midx.hashes >dropped.hashes && + test_must_be_empty dropped.hashes + ) +' + test_done