diff --git a/Documentation/config.txt b/Documentation/config.txt index 4b0c3682cc..499a3c4360 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1872,6 +1872,17 @@ pack.writebitmaps:: space and extra time spent on the initial repack. Defaults to false. +pack.writeBitmapHashCache:: + When true, git will include a "hash cache" section in the bitmap + index (if one is written). This cache can be used to feed git's + delta heuristics, potentially leading to better deltas between + bitmapped and non-bitmapped objects (e.g., when serving a fetch + between an older, bitmapped pack and objects that have been + pushed since the last gc). The downside is that it consumes 4 + bytes per object of disk space, and that JGit's bitmap + implementation does not understand it, causing it to complain if + Git and JGit are used on the same repository. Defaults to false. + pager.:: If the value is boolean, turns on or off pagination of the output of a particular Git subcommand when writing to a tty. diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt index 7a86bd77d5..f8c18a0f7a 100644 --- a/Documentation/technical/bitmap-format.txt +++ b/Documentation/technical/bitmap-format.txt @@ -21,6 +21,12 @@ GIT bitmap v1 format requirement for the bitmap index format, also present in JGit, that greatly reduces the complexity of the implementation. + - BITMAP_OPT_HASH_CACHE (0x4) + If present, the end of the bitmap file contains + `N` 32-bit name-hash values, one per object in the + pack. The format and meaning of the name-hash is + described below. + 4-byte entry count (network byte order) The total count of entries (bitmapped commits) in this bitmap index. @@ -129,3 +135,30 @@ The bitstream represented by the above chunk is then: The next word after `L_M` (if any) must again be a RLW, for the next chunk. For efficient appending to the bitstream, the EWAH stores a pointer to the last RLW in the stream. + + +== Appendix B: Optional Bitmap Sections + +These sections may or may not be present in the `.bitmap` file; their +presence is indicated by the header flags section described above. + +Name-hash cache +--------------- + +If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains +a cache of 32-bit values, one per object in the pack. The value at +position `i` is the hash of the pathname at which the `i`th object +(counting in index order) in the pack can be found. This can be fed +into the delta heuristics to compare objects with similar pathnames. + +The hash algorithm used is: + + hash = 0; + while ((c = *name++)) + if (!isspace(c)) + hash = (hash >> 2) + (c << 24); + +Note that this hashing scheme is tied to the BITMAP_OPT_HASH_CACHE flag. +If implementations want to choose a different hashing scheme, they are +free to do so, but MUST allocate a new header flag (because comparing +hashes made under two different schemes would be pointless). diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index fd6ae01ba4..fd741970e6 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -64,6 +64,7 @@ static off_t reuse_packfile_offset; static int use_bitmap_index = 1; static int write_bitmap_index; +static uint16_t write_bitmap_options; static unsigned long delta_cache_size = 0; static unsigned long max_delta_cache_size = 256 * 1024 * 1024; @@ -851,7 +852,8 @@ static void write_pack_file(void) bitmap_writer_reuse_bitmaps(&to_pack); bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); bitmap_writer_build(&to_pack); - bitmap_writer_finish(written_list, nr_written, tmpname); + bitmap_writer_finish(written_list, nr_written, + tmpname, write_bitmap_options); write_bitmap_index = 0; } @@ -2201,6 +2203,12 @@ static int git_pack_config(const char *k, const char *v, void *cb) write_bitmap_index = git_config_bool(k, v); return 0; } + if (!strcmp(k, "pack.writebitmaphashcache")) { + if (git_config_bool(k, v)) + write_bitmap_options |= BITMAP_OPT_HASH_CACHE; + else + write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE; + } if (!strcmp(k, "pack.usebitmaps")) { use_bitmap_index = git_config_bool(k, v); return 0; diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 954a74d6cf..1218befaf2 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -490,6 +490,19 @@ static void write_selected_commits_v1(struct sha1file *f, } } +static void write_hash_cache(struct sha1file *f, + struct pack_idx_entry **index, + uint32_t index_nr) +{ + uint32_t i; + + for (i = 0; i < index_nr; ++i) { + struct object_entry *entry = (struct object_entry *)index[i]; + uint32_t hash_value = htonl(entry->hash); + sha1write(f, &hash_value, sizeof(hash_value)); + } +} + void bitmap_writer_set_checksum(unsigned char *sha1) { hashcpy(writer.pack_checksum, sha1); @@ -497,7 +510,8 @@ void bitmap_writer_set_checksum(unsigned char *sha1) void bitmap_writer_finish(struct pack_idx_entry **index, uint32_t index_nr, - const char *filename) + const char *filename, + uint16_t options) { static char tmp_file[PATH_MAX]; static uint16_t default_version = 1; @@ -514,7 +528,7 @@ void bitmap_writer_finish(struct pack_idx_entry **index, memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)); header.version = htons(default_version); - header.options = htons(flags); + header.options = htons(flags | options); header.entry_count = htonl(writer.selected_nr); memcpy(header.checksum, writer.pack_checksum, 20); @@ -525,6 +539,9 @@ void bitmap_writer_finish(struct pack_idx_entry **index, dump_bitmap(f, writer.tags); write_selected_commits_v1(f, index, index_nr); + if (options & BITMAP_OPT_HASH_CACHE) + write_hash_cache(f, index, index_nr); + sha1close(f, NULL, CSUM_FSYNC); if (adjust_shared_perm(tmp_file)) diff --git a/pack-bitmap.c b/pack-bitmap.c index 82090a6741..ae0b57b950 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -66,6 +66,9 @@ static struct bitmap_index { /* Number of bitmapped commits */ uint32_t entry_count; + /* Name-hash cache (or NULL if not present). */ + uint32_t *hashes; + /* * Extended index. * @@ -152,6 +155,11 @@ static int load_bitmap_header(struct bitmap_index *index) if ((flags & BITMAP_OPT_FULL_DAG) == 0) return error("Unsupported options for bitmap index file " "(Git requires BITMAP_OPT_FULL_DAG)"); + + if (flags & BITMAP_OPT_HASH_CACHE) { + unsigned char *end = index->map + index->map_size - 20; + index->hashes = ((uint32_t *)end) - index->pack->num_objects; + } } index->entry_count = ntohl(header->entry_count); @@ -626,6 +634,9 @@ static void show_objects_for_type( entry = &bitmap_git.reverse_index->revindex[pos + offset]; sha1 = nth_packed_object_sha1(bitmap_git.pack, entry->nr); + if (bitmap_git.hashes) + hash = ntohl(bitmap_git.hashes[entry->nr]); + show_reach(sha1, object_type, 0, hash, bitmap_git.pack, entry->offset); } diff --git a/pack-bitmap.h b/pack-bitmap.h index 09acf02f7b..8b7f4e9f0d 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -24,7 +24,8 @@ static const char BITMAP_IDX_SIGNATURE[] = {'B', 'I', 'T', 'M'}; #define NEEDS_BITMAP (1u<<22) enum pack_bitmap_opts { - BITMAP_OPT_FULL_DAG = 1 + BITMAP_OPT_FULL_DAG = 1, + BITMAP_OPT_HASH_CACHE = 4, }; enum pack_bitmap_flags { @@ -57,6 +58,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, void bitmap_writer_build(struct packing_data *to_pack); void bitmap_writer_finish(struct pack_idx_entry **index, uint32_t index_nr, - const char *filename); + const char *filename, + uint16_t options); #endif diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 8c6ae4567c..685d46f8b7 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -9,7 +9,8 @@ test_perf_large_repo # since we want to be able to compare bitmap-aware # git versus non-bitmap git test_expect_success 'setup bitmap config' ' - git config pack.writebitmaps true + git config pack.writebitmaps true && + git config pack.writebitmaphashcache true ' test_perf 'repack to disk' ' diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index d2b0c45cca..d3a3afaba8 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -14,7 +14,8 @@ test_expect_success 'setup repo with moderate-sized history' ' git checkout master && blob=$(echo tagged-blob | git hash-object -w --stdin) && git tag tagged-blob $blob && - git config pack.writebitmaps true + git config pack.writebitmaps true && + git config pack.writebitmaphashcache true ' test_expect_success 'full repack creates bitmaps' '