diff --git a/Documentation/config/bundle.txt b/Documentation/config/bundle.txt index daa21eb674..3faae38685 100644 --- a/Documentation/config/bundle.txt +++ b/Documentation/config/bundle.txt @@ -15,6 +15,13 @@ bundle.mode:: complete understanding of the bundled information (`all`) or if any one of the listed bundle URIs is sufficient (`any`). +bundle.heuristic:: + If this string-valued key exists, then the bundle list is designed to + work well with incremental `git fetch` commands. The heuristic signals + that there are additional keys available for each bundle that help + determine which subset of bundles the client should download. The + only value currently understood is `creationToken`. + bundle..*:: The `bundle..*` keys are used to describe a single item in the bundle list, grouped under `` for identification purposes. diff --git a/Documentation/config/fetch.txt b/Documentation/config/fetch.txt index cd65d236b4..568f0f75b3 100644 --- a/Documentation/config/fetch.txt +++ b/Documentation/config/fetch.txt @@ -96,3 +96,27 @@ fetch.writeCommitGraph:: merge and the write may take longer. Having an updated commit-graph file helps performance of many Git commands, including `git merge-base`, `git push -f`, and `git log --graph`. Defaults to false. + +fetch.bundleURI:: + This value stores a URI for downloading Git object data from a bundle + URI before performing an incremental fetch from the origin Git server. + This is similar to how the `--bundle-uri` option behaves in + linkgit:git-clone[1]. `git clone --bundle-uri` will set the + `fetch.bundleURI` value if the supplied bundle URI contains a bundle + list that is organized for incremental fetches. ++ +If you modify this value and your repository has a `fetch.bundleCreationToken` +value, then remove that `fetch.bundleCreationToken` value before fetching from +the new bundle URI. + +fetch.bundleCreationToken:: + When using `fetch.bundleURI` to fetch incrementally from a bundle + list that uses the "creationToken" heuristic, this config value + stores the maximum `creationToken` value of the downloaded bundles. + This value is used to prevent downloading bundles in the future + if the advertised `creationToken` is not strictly larger than this + value. ++ +The creation token values are chosen by the provider serving the specific +bundle URI. If you modify the URI at `fetch.bundleURI`, then be sure to +remove the value for the `fetch.bundleCreationToken` value before fetching. diff --git a/Documentation/technical/bundle-uri.txt b/Documentation/technical/bundle-uri.txt index b78d01d9ad..91d3a13e32 100644 --- a/Documentation/technical/bundle-uri.txt +++ b/Documentation/technical/bundle-uri.txt @@ -479,14 +479,14 @@ outline for submitting these features: (This choice is an opt-in via a config option and a command-line option.) -4. Allow the client to understand the `bundle.flag=forFetch` configuration +4. Allow the client to understand the `bundle.heuristic` configuration key and the `bundle..creationToken` heuristic. When `git clone` - discovers a bundle URI with `bundle.flag=forFetch`, it configures the - client repository to check that bundle URI during later `git fetch ` + discovers a bundle URI with `bundle.heuristic`, it configures the client + repository to check that bundle URI during later `git fetch ` commands. 5. Allow clients to discover bundle URIs during `git fetch` and configure - a bundle URI for later fetches if `bundle.flag=forFetch`. + a bundle URI for later fetches if `bundle.heuristic` is set. 6. Implement the "inspect headers" heuristic to reduce data downloads when the `bundle..creationToken` heuristic is not available. diff --git a/builtin/clone.c b/builtin/clone.c index 09b6d5788c..5691364ea1 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1248,12 +1248,16 @@ int cmd_clone(int argc, const char **argv, const char *prefix) * data from the --bundle-uri option. */ if (bundle_uri) { + int has_heuristic = 0; + /* At this point, we need the_repository to match the cloned repo. */ if (repo_init(the_repository, git_dir, work_tree)) warning(_("failed to initialize the repo, skipping bundle URI")); - else if (fetch_bundle_uri(the_repository, bundle_uri)) + else if (fetch_bundle_uri(the_repository, bundle_uri, &has_heuristic)) warning(_("failed to fetch objects from bundle URI '%s'"), bundle_uri); + else if (has_heuristic) + git_config_set_gently("fetch.bundleuri", bundle_uri); } strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD"); diff --git a/builtin/fetch.c b/builtin/fetch.c index 12978622d5..a21ce89312 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -29,6 +29,7 @@ #include "commit-graph.h" #include "shallow.h" #include "worktree.h" +#include "bundle-uri.h" #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000) @@ -2109,6 +2110,7 @@ static int fetch_one(struct remote *remote, int argc, const char **argv, int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; + const char *bundle_uri; struct string_list list = STRING_LIST_INIT_DUP; struct remote *remote = NULL; int result = 0; @@ -2194,6 +2196,10 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) if (dry_run) write_fetch_head = 0; + if (!git_config_get_string_tmp("fetch.bundleuri", &bundle_uri) && + fetch_bundle_uri(the_repository, bundle_uri, NULL)) + warning(_("failed to fetch bundles from '%s'"), bundle_uri); + if (all) { if (argc == 1) die(_("fetch --all does not take a repository argument")); diff --git a/bundle-uri.c b/bundle-uri.c index 6462ab6deb..8a3c39ce57 100644 --- a/bundle-uri.c +++ b/bundle-uri.c @@ -9,6 +9,14 @@ #include "config.h" #include "remote.h" +static struct { + enum bundle_list_heuristic heuristic; + const char *name; +} heuristics[BUNDLE_HEURISTIC__COUNT] = { + { BUNDLE_HEURISTIC_NONE, ""}, + { BUNDLE_HEURISTIC_CREATIONTOKEN, "creationToken" }, +}; + static int compare_bundles(const void *hashmap_cmp_fn_data, const struct hashmap_entry *he1, const struct hashmap_entry *he2, @@ -75,6 +83,9 @@ static int summarize_bundle(struct remote_bundle_info *info, void *data) FILE *fp = data; fprintf(fp, "[bundle \"%s\"]\n", info->id); fprintf(fp, "\turi = %s\n", info->uri); + + if (info->creationToken) + fprintf(fp, "\tcreationToken = %"PRIu64"\n", info->creationToken); return 0; } @@ -100,6 +111,17 @@ void print_bundle_list(FILE *fp, struct bundle_list *list) fprintf(fp, "\tversion = %d\n", list->version); fprintf(fp, "\tmode = %s\n", mode); + if (list->heuristic) { + int i; + for (i = 0; i < BUNDLE_HEURISTIC__COUNT; i++) { + if (heuristics[i].heuristic == list->heuristic) { + printf("\theuristic = %s\n", + heuristics[list->heuristic].name); + break; + } + } + } + for_all_bundles_in_list(list, summarize_bundle, fp); } @@ -142,6 +164,21 @@ static int bundle_list_update(const char *key, const char *value, return 0; } + if (!strcmp(subkey, "heuristic")) { + int i; + for (i = 0; i < BUNDLE_HEURISTIC__COUNT; i++) { + if (heuristics[i].heuristic && + heuristics[i].name && + !strcmp(value, heuristics[i].name)) { + list->heuristic = heuristics[i].heuristic; + return 0; + } + } + + /* Ignore unknown heuristics. */ + return 0; + } + /* Ignore other unknown global keys. */ return 0; } @@ -169,6 +206,13 @@ static int bundle_list_update(const char *key, const char *value, return 0; } + if (!strcmp(subkey, "creationtoken")) { + if (sscanf(value, "%"PRIu64, &bundle->creationToken) != 1) + warning(_("could not parse bundle list key %s with value '%s'"), + "creationToken", value); + return 0; + } + /* * At this point, we ignore any information that we don't * understand, assuming it to be hints for a heuristic the client @@ -403,6 +447,183 @@ static int download_bundle_to_file(struct remote_bundle_info *bundle, void *data return 0; } +struct bundles_for_sorting { + struct remote_bundle_info **items; + size_t alloc; + size_t nr; +}; + +static int append_bundle(struct remote_bundle_info *bundle, void *data) +{ + struct bundles_for_sorting *list = data; + list->items[list->nr++] = bundle; + return 0; +} + +/** + * For use in QSORT() to get a list sorted by creationToken + * in decreasing order. + */ +static int compare_creation_token_decreasing(const void *va, const void *vb) +{ + const struct remote_bundle_info * const *a = va; + const struct remote_bundle_info * const *b = vb; + + if ((*a)->creationToken > (*b)->creationToken) + return -1; + if ((*a)->creationToken < (*b)->creationToken) + return 1; + return 0; +} + +static int fetch_bundles_by_token(struct repository *r, + struct bundle_list *list) +{ + int cur; + int move_direction = 0; + const char *creationTokenStr; + uint64_t maxCreationToken = 0, newMaxCreationToken = 0; + struct bundle_list_context ctx = { + .r = r, + .list = list, + .mode = list->mode, + }; + struct bundles_for_sorting bundles = { + .alloc = hashmap_get_size(&list->bundles), + }; + + ALLOC_ARRAY(bundles.items, bundles.alloc); + + for_all_bundles_in_list(list, append_bundle, &bundles); + + if (!bundles.nr) { + free(bundles.items); + return 0; + } + + QSORT(bundles.items, bundles.nr, compare_creation_token_decreasing); + + /* + * If fetch.bundleCreationToken exists, parses to a uint64t, and + * is not strictly smaller than the maximum creation token in the + * bundle list, then do not download any bundles. + */ + if (!repo_config_get_value(r, + "fetch.bundlecreationtoken", + &creationTokenStr) && + sscanf(creationTokenStr, "%"PRIu64, &maxCreationToken) == 1 && + bundles.items[0]->creationToken <= maxCreationToken) { + free(bundles.items); + return 0; + } + + /* + * Attempt to download and unbundle the minimum number of bundles by + * creationToken in decreasing order. If we fail to unbundle (after + * a successful download) then move to the next non-downloaded bundle + * and attempt downloading. Once we succeed in applying a bundle, + * move to the previous unapplied bundle and attempt to unbundle it + * again. + * + * In the case of a fresh clone, we will likely download all of the + * bundles before successfully unbundling the oldest one, then the + * rest of the bundles unbundle successfully in increasing order + * of creationToken. + * + * If there are existing objects, then this process may terminate + * early when all required commits from "new" bundles exist in the + * repo's object store. + */ + cur = 0; + while (cur >= 0 && cur < bundles.nr) { + struct remote_bundle_info *bundle = bundles.items[cur]; + + /* + * If we need to dig into bundles below the previous + * creation token value, then likely we are in an erroneous + * state due to missing or invalid bundles. Halt the process + * instead of continuing to download extra data. + */ + if (bundle->creationToken <= maxCreationToken) + break; + + if (!bundle->file) { + /* + * Not downloaded yet. Try downloading. + * + * Note that bundle->file is non-NULL if a download + * was attempted, even if it failed to download. + */ + if (fetch_bundle_uri_internal(ctx.r, bundle, ctx.depth + 1, ctx.list)) { + /* Mark as unbundled so we do not retry. */ + bundle->unbundled = 1; + + /* Try looking deeper in the list. */ + move_direction = 1; + goto move; + } + + /* We expect bundles when using creationTokens. */ + if (!is_bundle(bundle->file, 1)) { + warning(_("file downloaded from '%s' is not a bundle"), + bundle->uri); + break; + } + } + + if (bundle->file && !bundle->unbundled) { + /* + * This was downloaded, but not successfully + * unbundled. Try unbundling again. + */ + if (unbundle_from_file(ctx.r, bundle->file)) { + /* Try looking deeper in the list. */ + move_direction = 1; + } else { + /* + * Succeeded in unbundle. Retry bundles + * that previously failed to unbundle. + */ + move_direction = -1; + bundle->unbundled = 1; + + if (bundle->creationToken > newMaxCreationToken) + newMaxCreationToken = bundle->creationToken; + } + } + + /* + * Else case: downloaded and unbundled successfully. + * Skip this by moving in the same direction as the + * previous step. + */ + +move: + /* Move in the specified direction and repeat. */ + cur += move_direction; + } + + /* + * We succeed if the loop terminates because 'cur' drops below + * zero. The other case is that we terminate because 'cur' + * reaches the end of the list, so we have a failure no matter + * which bundles we apply from the list. + */ + if (cur < 0) { + struct strbuf value = STRBUF_INIT; + strbuf_addf(&value, "%"PRIu64"", newMaxCreationToken); + if (repo_config_set_multivar_gently(ctx.r, + "fetch.bundleCreationToken", + value.buf, NULL, 0)) + warning(_("failed to store maximum creation token")); + + strbuf_release(&value); + } + + free(bundles.items); + return cur >= 0; +} + static int download_bundle_list(struct repository *r, struct bundle_list *local_list, struct bundle_list *global_list, @@ -440,7 +661,15 @@ static int fetch_bundle_list_in_config_format(struct repository *r, goto cleanup; } - if ((result = download_bundle_list(r, &list_from_bundle, + /* + * If this list uses the creationToken heuristic, then the URIs + * it advertises are expected to be bundles, not nested lists. + * We can drop 'global_list' and 'depth'. + */ + if (list_from_bundle.heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN) { + result = fetch_bundles_by_token(r, &list_from_bundle); + global_list->heuristic = BUNDLE_HEURISTIC_CREATIONTOKEN; + } else if ((result = download_bundle_list(r, &list_from_bundle, global_list, depth))) goto cleanup; @@ -551,7 +780,8 @@ static int unlink_bundle(struct remote_bundle_info *info, void *data) return 0; } -int fetch_bundle_uri(struct repository *r, const char *uri) +int fetch_bundle_uri(struct repository *r, const char *uri, + int *has_heuristic) { int result; struct bundle_list list; @@ -571,6 +801,8 @@ int fetch_bundle_uri(struct repository *r, const char *uri) result = unbundle_all_bundles(r, &list); cleanup: + if (has_heuristic) + *has_heuristic = (list.heuristic != BUNDLE_HEURISTIC_NONE); for_all_bundles_in_list(&list, unlink_bundle, NULL); clear_bundle_list(&list); clear_remote_bundle_info(&bundle, NULL); @@ -582,6 +814,14 @@ int fetch_bundle_list(struct repository *r, struct bundle_list *list) int result; struct bundle_list global_list; + /* + * If the creationToken heuristic is used, then the URIs + * advertised by 'list' are not nested lists and instead + * direct bundles. We do not need to use global_list. + */ + if (list->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN) + return fetch_bundles_by_token(r, list); + init_bundle_list(&global_list); /* If a bundle is added to this global list, then it is required. */ @@ -590,7 +830,10 @@ int fetch_bundle_list(struct repository *r, struct bundle_list *list) if ((result = download_bundle_list(r, list, &global_list, 0))) goto cleanup; - result = unbundle_all_bundles(r, &global_list); + if (list->heuristic == BUNDLE_HEURISTIC_CREATIONTOKEN) + result = fetch_bundles_by_token(r, list); + else + result = unbundle_all_bundles(r, &global_list); cleanup: for_all_bundles_in_list(&global_list, unlink_bundle, NULL); diff --git a/bundle-uri.h b/bundle-uri.h index d5e89f1671..6dbc780f66 100644 --- a/bundle-uri.h +++ b/bundle-uri.h @@ -42,6 +42,12 @@ struct remote_bundle_info { * this boolean is true. */ unsigned unbundled:1; + + /** + * If the bundle is part of a list with the creationToken + * heuristic, then we use this member for sorting the bundles. + */ + uint64_t creationToken; }; #define REMOTE_BUNDLE_INFO_INIT { 0 } @@ -52,6 +58,14 @@ enum bundle_list_mode { BUNDLE_MODE_ANY }; +enum bundle_list_heuristic { + BUNDLE_HEURISTIC_NONE = 0, + BUNDLE_HEURISTIC_CREATIONTOKEN, + + /* Must be last. */ + BUNDLE_HEURISTIC__COUNT +}; + /** * A bundle_list contains an unordered set of remote_bundle_info structs, * as well as information about the bundle listing, such as version and @@ -75,6 +89,12 @@ struct bundle_list { * advertised by the bundle list at that location. */ char *baseURI; + + /** + * A list can have a heuristic, which helps reduce the number of + * downloaded bundles. + */ + enum bundle_list_heuristic heuristic; }; void init_bundle_list(struct bundle_list *list); @@ -104,8 +124,14 @@ int bundle_uri_parse_config_format(const char *uri, * based on that information. * * Returns non-zero if no bundle information is found at the given 'uri'. + * + * If the pointer 'has_heuristic' is non-NULL, then the value it points to + * will be set to be non-zero if and only if the fetched list has a + * heuristic value. Such a value indicates that the list was designed for + * incremental fetches. */ -int fetch_bundle_uri(struct repository *r, const char *uri); +int fetch_bundle_uri(struct repository *r, const char *uri, + int *has_heuristic); /** * Given a bundle list that was already advertised (likely by the diff --git a/bundle.c b/bundle.c index 4ef7256aa1..76c3a90489 100644 --- a/bundle.c +++ b/bundle.c @@ -12,6 +12,7 @@ #include "refs.h" #include "strvec.h" #include "list-objects-filter-options.h" +#include "connected.h" static const char v2_bundle_signature[] = "# v2 git bundle\n"; static const char v3_bundle_signature[] = "# v3 git bundle\n"; @@ -187,6 +188,21 @@ static int list_refs(struct string_list *r, int argc, const char **argv) /* Remember to update object flag allocation in object.h */ #define PREREQ_MARK (1u<<16) +struct string_list_iterator { + struct string_list *list; + size_t cur; +}; + +static const struct object_id *iterate_ref_map(void *cb_data) +{ + struct string_list_iterator *iter = cb_data; + + if (iter->cur >= iter->list->nr) + return NULL; + + return iter->list->items[iter->cur++].util; +} + int verify_bundle(struct repository *r, struct bundle_header *header, enum verify_bundle_flags flags) @@ -196,26 +212,25 @@ int verify_bundle(struct repository *r, * to be verbose about the errors */ struct string_list *p = &header->prerequisites; - struct rev_info revs = REV_INFO_INIT; - const char *argv[] = {NULL, "--all", NULL}; - struct commit *commit; - int i, ret = 0, req_nr; + int i, ret = 0; const char *message = _("Repository lacks these prerequisite commits:"); + struct string_list_iterator iter = { + .list = p, + }; + struct check_connected_options opts = { + .quiet = 1, + }; if (!r || !r->objects || !r->objects->odb) return error(_("need a repository to verify a bundle")); - repo_init_revisions(r, &revs, NULL); for (i = 0; i < p->nr; i++) { struct string_list_item *e = p->items + i; const char *name = e->string; struct object_id *oid = e->util; struct object *o = parse_object(r, oid); - if (o) { - o->flags |= PREREQ_MARK; - add_pending_object(&revs, o, name); + if (o) continue; - } ret++; if (flags & VERIFY_BUNDLE_QUIET) continue; @@ -223,37 +238,14 @@ int verify_bundle(struct repository *r, error("%s", message); error("%s %s", oid_to_hex(oid), name); } - if (revs.pending.nr != p->nr) + if (ret) goto cleanup; - req_nr = revs.pending.nr; - setup_revisions(2, argv, &revs, NULL); - list_objects_filter_copy(&revs.filter, &header->filter); - - if (prepare_revision_walk(&revs)) - die(_("revision walk setup failed")); - - i = req_nr; - while (i && (commit = get_revision(&revs))) - if (commit->object.flags & PREREQ_MARK) - i--; - - for (i = 0; i < p->nr; i++) { - struct string_list_item *e = p->items + i; - const char *name = e->string; - const struct object_id *oid = e->util; - struct object *o = parse_object(r, oid); - assert(o); /* otherwise we'd have returned early */ - if (o->flags & SHOWN) - continue; - ret++; - if (flags & VERIFY_BUNDLE_QUIET) - continue; - if (ret == 1) - error("%s", message); - error("%s %s", oid_to_hex(oid), name); - } + if ((ret = check_connected(iterate_ref_map, &iter, &opts))) + error(_("some prerequisite commits exist in the object store, " + "but are not connected to the repository's history")); + /* TODO: preserve this verbose language. */ if (flags & VERIFY_BUNDLE_VERBOSE) { struct string_list *r; @@ -282,15 +274,6 @@ int verify_bundle(struct repository *r, list_objects_filter_spec(&header->filter)); } cleanup: - /* Clean up objects used, as they will be reused. */ - for (i = 0; i < p->nr; i++) { - struct string_list_item *e = p->items + i; - struct object_id *oid = e->util; - commit = lookup_commit_reference_gently(r, oid, 1); - if (commit) - clear_commit_marks(commit, ALL_REV_FLAGS | PREREQ_MARK); - } - release_revisions(&revs); return ret; } diff --git a/t/t5558-clone-bundle-uri.sh b/t/t5558-clone-bundle-uri.sh index 9155f31fa2..afd56926c5 100755 --- a/t/t5558-clone-bundle-uri.sh +++ b/t/t5558-clone-bundle-uri.sh @@ -285,6 +285,8 @@ test_expect_success 'clone HTTP bundle' ' ' test_expect_success 'clone bundle list (HTTP, no heuristic)' ' + test_when_finished rm -f trace*.txt && + cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" && cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && [bundle] @@ -304,12 +306,26 @@ test_expect_success 'clone bundle list (HTTP, no heuristic)' ' uri = $HTTPD_URL/bundle-4.bundle EOF - git clone --bundle-uri="$HTTPD_URL/bundle-list" \ + GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" \ + git clone --bundle-uri="$HTTPD_URL/bundle-list" \ clone-from clone-list-http 2>err && ! grep "Repository lacks these prerequisite commits" err && git -C clone-from for-each-ref --format="%(objectname)" >oids && - git -C clone-list-http cat-file --batch-check expect <<-EOF && + $HTTPD_URL/bundle-1.bundle + $HTTPD_URL/bundle-2.bundle + $HTTPD_URL/bundle-3.bundle + $HTTPD_URL/bundle-4.bundle + $HTTPD_URL/bundle-list + EOF + + # Sort the list, since the order is not well-defined + # without a heuristic. + test_remote_https_urls actual && + test_cmp expect actual ' test_expect_success 'clone bundle list (HTTP, any mode)' ' @@ -350,6 +366,658 @@ test_expect_success 'clone bundle list (HTTP, any mode)' ' test_cmp expect actual ' +test_expect_success 'clone bundle list (http, creationToken)' ' + test_when_finished rm -f trace*.txt && + + cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" && + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" git \ + clone --bundle-uri="$HTTPD_URL/bundle-list" \ + "$HTTPD_URL/smart/fetch.git" clone-list-http-2 && + + git -C clone-from for-each-ref --format="%(objectname)" >oids && + git -C clone-list-http-2 cat-file --batch-check expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-4.bundle + $HTTPD_URL/bundle-3.bundle + $HTTPD_URL/bundle-2.bundle + $HTTPD_URL/bundle-1.bundle + EOF + + test_remote_https_urls actual && + test_cmp expect actual +' + +test_expect_success 'clone incomplete bundle list (http, creationToken)' ' + test_when_finished rm -f trace*.txt && + + cp clone-from/bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" && + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + EOF + + GIT_TRACE2_EVENT=$(pwd)/trace-clone.txt \ + git clone --bundle-uri="$HTTPD_URL/bundle-list" \ + --single-branch --branch=base --no-tags \ + "$HTTPD_URL/smart/fetch.git" clone-token-http && + + test_cmp_config -C clone-token-http "$HTTPD_URL/bundle-list" fetch.bundleuri && + test_cmp_config -C clone-token-http 1 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-1.bundle + EOF + + test_remote_https_urls actual && + test_cmp expect actual && + + # We now have only one bundle ref. + git -C clone-token-http for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-\EOF && + refs/bundles/base + EOF + test_cmp expect refs && + + # Add remaining bundles, exercising the "deepening" strategy + # for downloading via the creationToken heurisitc. + cat >>"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace1.txt" \ + git -C clone-token-http fetch origin --no-tags \ + refs/heads/merge:refs/heads/merge && + test_cmp_config -C clone-token-http 4 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-4.bundle + $HTTPD_URL/bundle-3.bundle + $HTTPD_URL/bundle-2.bundle + EOF + + test_remote_https_urls actual && + test_cmp expect actual && + + # We now have all bundle refs. + git -C clone-token-http for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + + cat >expect <<-\EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/merge + refs/bundles/right + EOF + test_cmp expect refs +' + +test_expect_success 'http clone with bundle.heuristic creates fetch.bundleURI' ' + test_when_finished rm -rf fetch-http-4 trace*.txt && + + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" \ + git clone --single-branch --branch=base \ + --bundle-uri="$HTTPD_URL/bundle-list" \ + "$HTTPD_URL/smart/fetch.git" fetch-http-4 && + + test_cmp_config -C fetch-http-4 "$HTTPD_URL/bundle-list" fetch.bundleuri && + test_cmp_config -C fetch-http-4 1 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-1.bundle + EOF + + test_remote_https_urls actual && + test_cmp expect actual && + + # only received base ref from bundle-1 + git -C fetch-http-4 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-\EOF && + refs/bundles/base + EOF + test_cmp expect refs && + + cat >>"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + EOF + + # Fetch the objects for bundle-2 _and_ bundle-3. + GIT_TRACE2_EVENT="$(pwd)/trace1.txt" \ + git -C fetch-http-4 fetch origin --no-tags \ + refs/heads/left:refs/heads/left \ + refs/heads/right:refs/heads/right && + test_cmp_config -C fetch-http-4 2 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-2.bundle + EOF + + test_remote_https_urls actual && + test_cmp expect actual && + + # received left from bundle-2 + git -C fetch-http-4 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-\EOF && + refs/bundles/base + refs/bundles/left + EOF + test_cmp expect refs && + + # No-op fetch + GIT_TRACE2_EVENT="$(pwd)/trace1b.txt" \ + git -C fetch-http-4 fetch origin --no-tags \ + refs/heads/left:refs/heads/left \ + refs/heads/right:refs/heads/right && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + cat >>"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + EOF + + # This fetch should skip bundle-3.bundle, since its objects are + # already local (we have the requisite commits for bundle-4.bundle). + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + git -C fetch-http-4 fetch origin --no-tags \ + refs/heads/merge:refs/heads/merge && + test_cmp_config -C fetch-http-4 4 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-4.bundle + EOF + + test_remote_https_urls actual && + test_cmp expect actual && + + # received merge ref from bundle-4, but right is missing + # because we did not download bundle-3. + git -C fetch-http-4 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + + cat >expect <<-\EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/merge + EOF + test_cmp expect refs && + + # No-op fetch + GIT_TRACE2_EVENT="$(pwd)/trace2b.txt" \ + git -C fetch-http-4 fetch origin && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + EOF + test_remote_https_urls actual && + test_cmp expect actual +' + +test_expect_success 'creationToken heuristic with failed downloads (clone)' ' + test_when_finished rm -rf download-* trace*.txt && + + # Case 1: base bundle does not exist, nothing can unbundle + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = fake.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-clone-1.txt" \ + git clone --single-branch --branch=base \ + --bundle-uri="$HTTPD_URL/bundle-list" \ + "$HTTPD_URL/smart/fetch.git" download-1 && + + # Bundle failure does not set these configs. + test_must_fail git -C download-1 config fetch.bundleuri && + test_must_fail git -C download-1 config fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-4.bundle + $HTTPD_URL/bundle-3.bundle + $HTTPD_URL/bundle-2.bundle + $HTTPD_URL/fake.bundle + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + # All bundles failed to unbundle + git -C download-1 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + test_must_be_empty refs && + + # Case 2: middle bundle does not exist, only two bundles can unbundle + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = fake.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-clone-2.txt" \ + git clone --single-branch --branch=base \ + --bundle-uri="$HTTPD_URL/bundle-list" \ + "$HTTPD_URL/smart/fetch.git" download-2 && + + # Bundle failure does not set these configs. + test_must_fail git -C download-2 config fetch.bundleuri && + test_must_fail git -C download-2 config fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-4.bundle + $HTTPD_URL/bundle-3.bundle + $HTTPD_URL/fake.bundle + $HTTPD_URL/bundle-1.bundle + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + # bundle-1 and bundle-3 could unbundle, but bundle-4 could not + git -C download-2 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-EOF && + refs/bundles/base + refs/bundles/right + EOF + test_cmp expect refs && + + # Case 3: top bundle does not exist, rest unbundle fine. + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = fake.bundle + creationToken = 4 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-clone-3.txt" \ + git clone --single-branch --branch=base \ + --bundle-uri="$HTTPD_URL/bundle-list" \ + "$HTTPD_URL/smart/fetch.git" download-3 && + + # As long as we have continguous successful downloads, + # we _do_ set these configs. + test_cmp_config -C download-3 "$HTTPD_URL/bundle-list" fetch.bundleuri && + test_cmp_config -C download-3 3 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/fake.bundle + $HTTPD_URL/bundle-3.bundle + $HTTPD_URL/bundle-2.bundle + $HTTPD_URL/bundle-1.bundle + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + # fake.bundle did not unbundle, but the others did. + git -C download-3 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/right + EOF + test_cmp expect refs +' + +# Expand the bundle list to include other interesting shapes, specifically +# interesting for use when fetching from a previous state. +# +# ---------------- bundle-7 +# 7 +# _/|\_ +# ---/--|--\------ bundle-6 +# 5 | 6 +# --|---|---|----- bundle-4 +# | 4 | +# | / \ / +# --|-|---|/------ bundle-3 (the client will be caught up to this point.) +# \ | 3 +# ---\|---|------- bundle-2 +# 2 | +# ----|---|------- bundle-1 +# \ / +# 1 +# | +# (previous commits) +test_expect_success 'expand incremental bundle list' ' + ( + cd clone-from && + git checkout -b lefter left && + test_commit 5 && + git checkout -b righter right && + test_commit 6 && + git checkout -b top lefter && + git merge -m "7" merge righter && + + git bundle create bundle-6.bundle lefter righter --not left right && + git bundle create bundle-7.bundle top --not lefter merge righter && + + cp bundle-*.bundle "$HTTPD_DOCUMENT_ROOT_PATH/" + ) && + git -C "$HTTPD_DOCUMENT_ROOT_PATH/fetch.git" fetch origin +refs/heads/*:refs/heads/* +' + +test_expect_success 'creationToken heuristic with failed downloads (fetch)' ' + test_when_finished rm -rf download-* trace*.txt && + + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + EOF + + git clone --single-branch --branch=left \ + --bundle-uri="$HTTPD_URL/bundle-list" \ + "$HTTPD_URL/smart/fetch.git" fetch-base && + test_cmp_config -C fetch-base "$HTTPD_URL/bundle-list" fetch.bundleURI && + test_cmp_config -C fetch-base 3 fetch.bundleCreationToken && + + # Case 1: all bundles exist: successful unbundling of all bundles + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + + [bundle "bundle-6"] + uri = bundle-6.bundle + creationToken = 6 + + [bundle "bundle-7"] + uri = bundle-7.bundle + creationToken = 7 + EOF + + cp -r fetch-base fetch-1 && + GIT_TRACE2_EVENT="$(pwd)/trace-fetch-1.txt" \ + git -C fetch-1 fetch origin && + test_cmp_config -C fetch-1 7 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-7.bundle + $HTTPD_URL/bundle-6.bundle + $HTTPD_URL/bundle-4.bundle + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + # Check which bundles have unbundled by refs + git -C fetch-1 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/lefter + refs/bundles/merge + refs/bundles/right + refs/bundles/righter + refs/bundles/top + EOF + test_cmp expect refs && + + # Case 2: middle bundle does not exist, only bundle-4 can unbundle + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + + [bundle "bundle-6"] + uri = fake.bundle + creationToken = 6 + + [bundle "bundle-7"] + uri = bundle-7.bundle + creationToken = 7 + EOF + + cp -r fetch-base fetch-2 && + GIT_TRACE2_EVENT="$(pwd)/trace-fetch-2.txt" \ + git -C fetch-2 fetch origin && + + # Since bundle-7 fails to unbundle, do not update creation token. + test_cmp_config -C fetch-2 3 fetch.bundlecreationtoken && + + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/bundle-7.bundle + $HTTPD_URL/fake.bundle + $HTTPD_URL/bundle-4.bundle + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + # Check which bundles have unbundled by refs + git -C fetch-2 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/merge + refs/bundles/right + EOF + test_cmp expect refs && + + # Case 3: top bundle does not exist, rest unbundle fine. + cat >"$HTTPD_DOCUMENT_ROOT_PATH/bundle-list" <<-EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "bundle-1"] + uri = bundle-1.bundle + creationToken = 1 + + [bundle "bundle-2"] + uri = bundle-2.bundle + creationToken = 2 + + [bundle "bundle-3"] + uri = bundle-3.bundle + creationToken = 3 + + [bundle "bundle-4"] + uri = bundle-4.bundle + creationToken = 4 + + [bundle "bundle-6"] + uri = bundle-6.bundle + creationToken = 6 + + [bundle "bundle-7"] + uri = fake.bundle + creationToken = 7 + EOF + + cp -r fetch-base fetch-3 && + GIT_TRACE2_EVENT="$(pwd)/trace-fetch-3.txt" \ + git -C fetch-3 fetch origin && + + # As long as we have continguous successful downloads, + # we _do_ set the maximum creation token. + test_cmp_config -C fetch-3 6 fetch.bundlecreationtoken && + + # NOTE: the fetch skips bundle-4 since bundle-6 successfully + # unbundles itself and bundle-7 failed to download. + cat >expect <<-EOF && + $HTTPD_URL/bundle-list + $HTTPD_URL/fake.bundle + $HTTPD_URL/bundle-6.bundle + EOF + test_remote_https_urls actual && + test_cmp expect actual && + + # Check which bundles have unbundled by refs + git -C fetch-3 for-each-ref --format="%(refname)" "refs/bundles/*" >refs && + cat >expect <<-EOF && + refs/bundles/base + refs/bundles/left + refs/bundles/lefter + refs/bundles/right + refs/bundles/righter + EOF + test_cmp expect refs +' + # Do not add tests here unless they use the HTTP server, as they will # not run unless the HTTP dependencies exist. diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 1928ea1dd7..b7d5551262 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -831,6 +831,52 @@ test_expect_success 'auto-discover multiple bundles from HTTP clone' ' grep -f pattern trace.txt ' +test_expect_success 'auto-discover multiple bundles from HTTP clone: creationToken heuristic' ' + test_when_finished rm -rf "$HTTPD_DOCUMENT_ROOT_PATH/repo4.git" && + test_when_finished rm -rf clone-heuristic trace*.txt && + + test_commit -C src newest && + git -C src bundle create "$HTTPD_DOCUMENT_ROOT_PATH/newest.bundle" HEAD~1..HEAD && + git clone --bare --no-local src "$HTTPD_DOCUMENT_ROOT_PATH/repo4.git" && + + cat >>"$HTTPD_DOCUMENT_ROOT_PATH/repo4.git/config" <<-EOF && + [uploadPack] + advertiseBundleURIs = true + + [bundle] + version = 1 + mode = all + heuristic = creationToken + + [bundle "everything"] + uri = $HTTPD_URL/everything.bundle + creationtoken = 1 + + [bundle "new"] + uri = $HTTPD_URL/new.bundle + creationtoken = 2 + + [bundle "newest"] + uri = $HTTPD_URL/newest.bundle + creationtoken = 3 + EOF + + GIT_TRACE2_EVENT="$(pwd)/trace-clone.txt" \ + git -c protocol.version=2 \ + -c transfer.bundleURI=true clone \ + "$HTTPD_URL/smart/repo4.git" clone-heuristic && + + cat >expect <<-EOF && + $HTTPD_URL/newest.bundle + $HTTPD_URL/new.bundle + $HTTPD_URL/everything.bundle + EOF + + # We should fetch all bundles in the expected order. + test_remote_https_urls actual && + test_cmp expect actual +' + # DO NOT add non-httpd-specific tests here, because the last part of this # test script is only executed when httpd is available and enabled. diff --git a/t/t5750-bundle-uri-parse.sh b/t/t5750-bundle-uri-parse.sh index 7b4f930e53..81bdf58b94 100755 --- a/t/t5750-bundle-uri-parse.sh +++ b/t/t5750-bundle-uri-parse.sh @@ -250,4 +250,41 @@ test_expect_success 'parse config format edge cases: empty key or value' ' test_cmp_config_output expect actual ' +test_expect_success 'parse config format: creationToken heuristic' ' + cat >expect <<-\EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + [bundle "one"] + uri = http://example.com/bundle.bdl + creationToken = 123456 + [bundle "two"] + uri = https://example.com/bundle.bdl + creationToken = 12345678901234567890 + [bundle "three"] + uri = file:///usr/share/git/bundle.bdl + creationToken = 1 + EOF + + test-tool bundle-uri parse-config expect >actual 2>err && + test_must_be_empty err && + test_cmp_config_output expect actual +' + +test_expect_success 'parse config format edge cases: creationToken heuristic' ' + cat >expect <<-\EOF && + [bundle] + version = 1 + mode = all + heuristic = creationToken + [bundle "one"] + uri = http://example.com/bundle.bdl + creationToken = bogus + EOF + + test-tool bundle-uri parse-config expect >actual 2>err && + grep "could not parse bundle list key creationToken with value '\''bogus'\''" err +' + test_done diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index 3a1cf30b1d..7d40994991 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -566,4 +566,44 @@ test_expect_success 'cloning from filtered bundle has useful error' ' grep "cannot clone from filtered bundle" err ' +test_expect_success 'verify catches unreachable, broken prerequisites' ' + test_when_finished rm -rf clone-from clone-to && + git init clone-from && + ( + cd clone-from && + git checkout -b base && + test_commit A && + git checkout -b tip && + git commit --allow-empty -m "will drop by shallow" && + git commit --allow-empty -m "will keep by shallow" && + git commit --allow-empty -m "for bundle, not clone" && + git bundle create tip.bundle tip~1..tip && + git reset --hard HEAD~1 && + git checkout base + ) && + BAD_OID=$(git -C clone-from rev-parse tip~1) && + TIP_OID=$(git -C clone-from rev-parse tip) && + git clone --depth=1 --no-single-branch \ + "file://$(pwd)/clone-from" clone-to && + ( + cd clone-to && + + # Set up broken history by removing shallow markers + git update-ref -d refs/remotes/origin/tip && + rm .git/shallow && + + # Verify should fail + test_must_fail git bundle verify \ + ../clone-from/tip.bundle 2>err && + grep "some prerequisite commits .* are not connected" err && + test_line_count = 1 err && + + # Unbundling should fail + test_must_fail git bundle unbundle \ + ../clone-from/tip.bundle 2>err && + grep "some prerequisite commits .* are not connected" err && + test_line_count = 1 err + ) +' + test_done diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 75b8ee95e7..58cfd2f1fd 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1767,6 +1767,14 @@ test_region () { return 0 } +# Given a GIT_TRACE2_EVENT log over stdin, writes to stdout a list of URLs +# sent to git-remote-https child processes. +test_remote_https_urls() { + grep -e '"event":"child_start".*"argv":\["git-remote-https",".*"\]' | + sed -e 's/{"event":"child_start".*"argv":\["git-remote-https","//g' \ + -e 's/"\]}//g' +} + # Print the destination of symlink(s) provided as arguments. Basically # the same as the readlink command, but it's not available everywhere. test_readlink () {