diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 96cc89d157d..1238bfd915d 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -227,6 +227,15 @@ ifdef::git-rev-list[] test the exit status to see if a range of objects is fully connected (or not). It is faster than redirecting stdout to `/dev/null` as the output does not have to be formatted. + +--disk-usage:: + Suppress normal output; instead, print the sum of the bytes used + for on-disk storage by the selected commits or objects. This is + equivalent to piping the output into `git cat-file + --batch-check='%(objectsize:disk)'`, except that it runs much + faster (especially with `--use-bitmap-index`). See the `CAVEATS` + section in linkgit:git-cat-file[1] for the limitations of what + "on-disk storage" means. endif::git-rev-list[] --cherry-mark:: diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 25c6c3b38d4..b4d8ea0a35b 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -80,6 +80,19 @@ static int arg_show_object_names = 1; #define DEFAULT_OIDSET_SIZE (16*1024) +static int show_disk_usage; +static off_t total_disk_usage; + +static off_t get_object_disk_usage(struct object *obj) +{ + off_t size; + struct object_info oi = OBJECT_INFO_INIT; + oi.disk_sizep = &size; + if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) + die(_("unable to get disk usage of %s"), oid_to_hex(&obj->oid)); + return size; +} + static void finish_commit(struct commit *commit); static void show_commit(struct commit *commit, void *data) { @@ -88,6 +101,9 @@ static void show_commit(struct commit *commit, void *data) display_progress(progress, ++progress_counter); + if (show_disk_usage) + total_disk_usage += get_object_disk_usage(&commit->object); + if (info->flags & REV_LIST_QUIET) { finish_commit(commit); return; @@ -258,6 +274,8 @@ static void show_object(struct object *obj, const char *name, void *cb_data) if (finish_object(obj, name, cb_data)) return; display_progress(progress, ++progress_counter); + if (show_disk_usage) + total_disk_usage += get_object_disk_usage(obj); if (info->flags & REV_LIST_QUIET) return; @@ -452,6 +470,23 @@ static int try_bitmap_traversal(struct rev_info *revs, return 0; } +static int try_bitmap_disk_usage(struct rev_info *revs, + struct list_objects_filter_options *filter) +{ + struct bitmap_index *bitmap_git; + + if (!show_disk_usage) + return -1; + + bitmap_git = prepare_bitmap_walk(revs, filter); + if (!bitmap_git) + return -1; + + printf("%"PRIuMAX"\n", + (uintmax_t)get_disk_usage_from_bitmap(bitmap_git, revs)); + return 0; +} + int cmd_rev_list(int argc, const char **argv, const char *prefix) { struct rev_info revs; @@ -584,6 +619,12 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) continue; } + if (!strcmp(arg, "--disk-usage")) { + show_disk_usage = 1; + info.flags |= REV_LIST_QUIET; + continue; + } + usage(rev_list_usage); } @@ -626,6 +667,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (use_bitmap_index) { if (!try_bitmap_count(&revs, &filter_options)) return 0; + if (!try_bitmap_disk_usage(&revs, &filter_options)) + return 0; if (!try_bitmap_traversal(&revs, &filter_options)) return 0; } @@ -690,5 +733,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) printf("%d\n", revs.count_left + revs.count_right); } + if (show_disk_usage) + printf("%"PRIuMAX"\n", (uintmax_t)total_disk_usage); + return 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 60fe20fb87a..1f69b5fa853 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1430,3 +1430,84 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *bitmap_git, return bitmap_git && bitmap_walk_contains(bitmap_git, bitmap_git->haves, oid); } + +static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, + enum object_type object_type) +{ + struct bitmap *result = bitmap_git->result; + struct packed_git *pack = bitmap_git->pack; + off_t total = 0; + struct ewah_iterator it; + eword_t filter; + size_t i; + + init_type_iterator(&it, bitmap_git, object_type); + for (i = 0; i < result->word_alloc && + ewah_iterator_next(&filter, &it); i++) { + eword_t word = result->words[i] & filter; + size_t base = (i * BITS_IN_EWORD); + unsigned offset; + + if (!word) + continue; + + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + size_t pos; + + if ((word >> offset) == 0) + break; + + offset += ewah_bit_ctz64(word >> offset); + pos = base + offset; + total += pack_pos_to_offset(pack, pos + 1) - + pack_pos_to_offset(pack, pos); + } + } + + return total; +} + +static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) +{ + struct bitmap *result = bitmap_git->result; + struct packed_git *pack = bitmap_git->pack; + struct eindex *eindex = &bitmap_git->ext_index; + off_t total = 0; + struct object_info oi = OBJECT_INFO_INIT; + off_t object_size; + size_t i; + + oi.disk_sizep = &object_size; + + for (i = 0; i < eindex->count; i++) { + struct object *obj = eindex->objects[i]; + + if (!bitmap_get(result, pack->num_objects + i)) + continue; + + if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) + die(_("unable to get disk usage of %s"), + oid_to_hex(&obj->oid)); + + total += object_size; + } + return total; +} + +off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git, + struct rev_info *revs) +{ + off_t total = 0; + + total += get_disk_usage_for_type(bitmap_git, OBJ_COMMIT); + if (revs->tree_objects) + total += get_disk_usage_for_type(bitmap_git, OBJ_TREE); + if (revs->blob_objects) + total += get_disk_usage_for_type(bitmap_git, OBJ_BLOB); + if (revs->tag_objects) + total += get_disk_usage_for_type(bitmap_git, OBJ_TAG); + + total += get_disk_usage_for_extended(bitmap_git); + + return total; +} diff --git a/pack-bitmap.h b/pack-bitmap.h index 25dfcf56156..36d99930d8d 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -68,6 +68,8 @@ int bitmap_walk_contains(struct bitmap_index *, */ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_id *oid); +off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *); + void bitmap_writer_show_progress(int show); void bitmap_writer_set_checksum(unsigned char *sha1); void bitmap_writer_build_type_index(struct packing_data *to_pack, diff --git a/t/t6115-rev-list-du.sh b/t/t6115-rev-list-du.sh new file mode 100755 index 00000000000..b4aef32b713 --- /dev/null +++ b/t/t6115-rev-list-du.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +test_description='basic tests of rev-list --disk-usage' +. ./test-lib.sh + +# we want a mix of reachable and unreachable, as well as +# objects in the bitmapped pack and some outside of it +test_expect_success 'set up repository' ' + test_commit --no-tag one && + test_commit --no-tag two && + git repack -adb && + git reset --hard HEAD^ && + test_commit --no-tag three && + test_commit --no-tag four && + git reset --hard HEAD^ +' + +# We don't want to hardcode sizes, because they depend on the exact details of +# packing, zlib, etc. We'll assume that the regular rev-list and cat-file +# machinery works and compare the --disk-usage output to that. +disk_usage_slow () { + git rev-list --no-object-names "$@" | + git cat-file --batch-check="%(objectsize:disk)" | + perl -lne '$total += $_; END { print $total}' +} + +# check behavior with given rev-list options; note that +# whitespace is not preserved in args +check_du () { + args=$* + + test_expect_success "generate expected size ($args)" " + disk_usage_slow $args >expect + " + + test_expect_success "rev-list --disk-usage without bitmaps ($args)" " + git rev-list --disk-usage $args >actual && + test_cmp expect actual + " + + test_expect_success "rev-list --disk-usage with bitmaps ($args)" " + git rev-list --disk-usage --use-bitmap-index $args >actual && + test_cmp expect actual + " +} + +check_du HEAD +check_du --objects HEAD +check_du --objects HEAD^..HEAD + +test_done