diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 17405c73a9..8c75855782 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -67,6 +67,12 @@ this option is given, future commit-graph writes will automatically assume that this option was intended. Use `--no-changed-paths` to stop storing this data. + +With the `--max-new-filters=` option, generate at most `n` new Bloom +filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is +enforced. Only commits present in the new layer count against this +limit. To retroactively compute Bloom filters over earlier layers, it is +advised to use `--split=replace`. ++ With the `--split[=]` option, write the commit-graph as a chain of multiple commit-graph files stored in `/info/commit-graphs`. Commit-graph layers are merged based on the diff --git a/bloom.c b/bloom.c index d234551ce0..68c73200a5 100644 --- a/bloom.c +++ b/bloom.c @@ -204,12 +204,11 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, if (!filter->data) { load_commit_graph_info(r, c); - if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH && - load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) - return filter; + if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH) + load_bloom_filter_from_graph(r->objects->commit_graph, filter, c); } - if (filter->data) + if (filter->data && filter->len) return filter; if (!compute_if_not_present) return NULL; diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index f3243bd982..5df9b2ef80 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--changed-paths] [--[no-]progress] "), + "[--changed-paths] [--[no-]max-new-filters ] [--[no-]progress] " + ""), NULL }; @@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = { static const char * const builtin_commit_graph_write_usage[] = { N_("git commit-graph write [--object-dir ] [--append] " "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--changed-paths] [--[no-]progress] "), + "[--changed-paths] [--[no-]max-new-filters ] [--[no-]progress] " + ""), NULL }; @@ -162,6 +164,23 @@ static int read_one_commit(struct oidset *commits, struct progress *progress, return 0; } +static int write_option_max_new_filters(const struct option *opt, + const char *arg, + int unset) +{ + int *to = opt->value; + if (unset) + *to = -1; + else { + const char *s; + *to = strtol(arg, (char **)&s, 10); + if (*s) + return error(_("%s expects a numerical value"), + optname(opt, opt->flags)); + } + return 0; +} + static int graph_write(int argc, const char **argv) { struct string_list pack_indexes = STRING_LIST_INIT_NODUP; @@ -197,6 +216,9 @@ static int graph_write(int argc, const char **argv) N_("maximum ratio between two levels of a split commit-graph")), OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time, N_("only expire files older than a given date-time")), + OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters, + NULL, N_("maximum number of changed-path Bloom filters to compute"), + 0, write_option_max_new_filters), OPT_END(), }; @@ -205,6 +227,7 @@ static int graph_write(int argc, const char **argv) write_opts.size_multiple = 2; write_opts.max_commits = 0; write_opts.expire_time = 0; + write_opts.max_new_filters = -1; trace2_cmd_mode("write"); diff --git a/commit-graph.c b/commit-graph.c index 1a53a03f5d..1aedc1c4df 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1408,6 +1408,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) int i; struct progress *progress = NULL; struct commit **sorted_commits; + int max_new_filters; init_bloom_filters(); @@ -1424,13 +1425,16 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) else QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp); + max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ? + ctx->opts->max_new_filters : ctx->commits.nr; + for (i = 0; i < ctx->commits.nr; i++) { enum bloom_filter_computed computed = 0; struct commit *c = sorted_commits[i]; struct bloom_filter *filter = get_or_compute_bloom_filter( ctx->r, c, - 1, + ctx->count_bloom_filter_computed < max_new_filters, ctx->bloom_settings, &computed); if (computed & BLOOM_COMPUTED) { @@ -1441,7 +1445,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx) ctx->count_bloom_filter_trunc_large++; } else if (computed & BLOOM_NOT_COMPUTED) ctx->count_bloom_filter_not_computed++; - ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len; + ctx->total_bloom_filter_data_size += filter + ? sizeof(unsigned char) * filter->len : 0; display_progress(progress, i + 1); } diff --git a/commit-graph.h b/commit-graph.h index b7914b0a7a..a22bd86701 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -110,6 +110,7 @@ struct commit_graph_opts { int max_commits; timestamp_t expire_time; enum commit_graph_split_flags split_flags; + int max_new_filters; }; /* diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh index ec9845c9be..48ab1b16a6 100755 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@ -305,4 +305,74 @@ test_expect_success 'correctly report commits with no changed paths' ' ) ' +test_expect_success 'Bloom generation is limited by --max-new-filters' ' + ( + cd limits && + test_commit c2 filter && + test_commit c3 filter && + test_commit c4 no-filter && + + rm -f trace.event && + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + git commit-graph write --reachable --split=replace \ + --changed-paths --max-new-filters=2 && + + test_filter_computed 2 trace.event && + test_filter_not_computed 3 trace.event && + test_filter_trunc_empty 0 trace.event && + test_filter_trunc_large 0 trace.event + ) +' + +test_expect_success 'Bloom generation backfills previously-skipped filters' ' + ( + cd limits && + + rm -f trace.event && + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + git commit-graph write --reachable --changed-paths \ + --split=replace --max-new-filters=1 && + test_filter_computed 1 trace.event && + test_filter_not_computed 4 trace.event && + test_filter_trunc_empty 0 trace.event && + test_filter_trunc_large 0 trace.event + ) +' + +test_expect_success 'Bloom generation backfills empty commits' ' + git init empty && + test_when_finished "rm -fr empty" && + ( + cd empty && + for i in $(test_seq 1 6) + do + git commit --allow-empty -m "$i" + done && + + # Generate Bloom filters for empty commits 1-6, two at a time. + for i in $(test_seq 1 3) + do + rm -f trace.event && + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + git commit-graph write --reachable \ + --changed-paths --max-new-filters=2 && + test_filter_computed 2 trace.event && + test_filter_not_computed 4 trace.event && + test_filter_trunc_empty 2 trace.event && + test_filter_trunc_large 0 trace.event + done && + + # Finally, make sure that once all commits have filters, that + # none are subsequently recomputed. + rm -f trace.event && + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + git commit-graph write --reachable \ + --changed-paths --max-new-filters=2 && + test_filter_computed 0 trace.event && + test_filter_not_computed 6 trace.event && + test_filter_trunc_empty 0 trace.event && + test_filter_trunc_large 0 trace.event + ) +' + test_done