From 895a1d1e57c8dfb8470bb742f05f1969b9961e07 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 5 Apr 2007 03:22:54 -0700 Subject: [PATCH 1/3] git-fetch--tool pick-rref This script helper takes list of fully qualified refnames and results from ls-remote and grabs only the lines for the named refs from the latter. Signed-off-by: Junio C Hamano --- builtin-fetch--tool.c | 84 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c index e9d6764550..be341c159f 100644 --- a/builtin-fetch--tool.c +++ b/builtin-fetch--tool.c @@ -436,10 +436,87 @@ static int expand_refs_wildcard(const char *ls_remote_result, int numrefs, return 0; } +static int pick_rref(int sha1_only, const char *rref, const char *ls_remote_result) +{ + int err = 0; + int lrr_count = lrr_count, i, pass; + const char *cp; + struct lrr { + const char *line; + const char *name; + int namelen; + int shown; + } *lrr_list = lrr_list; + + for (pass = 0; pass < 2; pass++) { + /* pass 0 counts and allocates, pass 1 fills... */ + cp = ls_remote_result; + i = 0; + while (1) { + const char *np; + while (*cp && isspace(*cp)) + cp++; + if (!*cp) + break; + np = strchr(cp, '\n'); + if (!np) + np = cp + strlen(cp); + if (pass) { + lrr_list[i].line = cp; + lrr_list[i].name = cp + 41; + lrr_list[i].namelen = np - (cp + 41); + } + i++; + cp = np; + } + if (!pass) { + lrr_count = i; + lrr_list = xcalloc(lrr_count, sizeof(*lrr_list)); + } + } + + while (1) { + const char *next; + int rreflen; + int i; + + while (*rref && isspace(*rref)) + rref++; + if (!*rref) + break; + next = strchr(rref, '\n'); + if (!next) + next = rref + strlen(rref); + rreflen = next - rref; + + for (i = 0; i < lrr_count; i++) { + struct lrr *lrr = &(lrr_list[i]); + + if (rreflen == lrr->namelen && + !memcmp(lrr->name, rref, rreflen)) { + if (!lrr->shown) + printf("%.*s\n", + sha1_only ? 40 : lrr->namelen + 41, + lrr->line); + lrr->shown = 1; + break; + } + } + if (lrr_count <= i) { + error("pick-rref: %.*s not found", rreflen, rref); + err = 1; + } + rref = next; + } + free(lrr_list); + return err; +} + int cmd_fetch__tool(int argc, const char **argv, const char *prefix) { int verbose = 0; int force = 0; + int sopt = 0; while (1 < argc) { const char *arg = argv[1]; @@ -447,6 +524,8 @@ int cmd_fetch__tool(int argc, const char **argv, const char *prefix) verbose = 1; else if (!strcmp("-f", arg)) force = 1; + else if (!strcmp("-s", arg)) + sopt = 1; else break; argc--; @@ -491,6 +570,11 @@ int cmd_fetch__tool(int argc, const char **argv, const char *prefix) reflist = get_stdin(); return parse_reflist(reflist); } + if (!strcmp("pick-rref", argv[1])) { + if (argc != 4) + return error("pick-rref takes 2 args"); + return pick_rref(sopt, argv[2], argv[3]); + } if (!strcmp("expand-refs-wildcard", argv[1])) { const char *reflist; if (argc < 4) From e3c6f240fd9c5bdeb33f2d47adc859f37935e2df Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 5 Apr 2007 03:22:55 -0700 Subject: [PATCH 2/3] git-fetch: use fetch--tool pick-rref to avoid local fetch from alternate When we are fetching from a repository that is on a local filesystem, first check if we have all the objects that we are going to fetch available locally, by not just checking the tips of what we are fetching, but with a full reachability analysis to our existing refs. In such a case, we do not have to run git-fetch-pack which would send many needless objects. This is especially true when the other repository is an alternate of the current repository (e.g. perhaps the repository was created by running "git clone -l -s" from there). The useless objects transferred used to be discarded when they were expanded by git-unpack-objects called from git-fetch-pack, but recent git-fetch-pack prefers to keep the data it receives from the other end without exploding them into loose objects, resulting in a pack full of duplicated data when fetching from your own alternate. This also uses fetch--tool pick-rref on dumb transport side to remove a shell loop to do the same. Signed-off-by: Junio C Hamano --- git-fetch.sh | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/git-fetch.sh b/git-fetch.sh index b04bd553f8..832b20cce6 100755 --- a/git-fetch.sh +++ b/git-fetch.sh @@ -177,9 +177,33 @@ fetch_all_at_once () { git-bundle unbundle "$remote" $rref || echo failed "$remote" else - git-fetch-pack --thin $exec $keep $shallow_depth \ - $quiet $no_progress "$remote" $rref || - echo failed "$remote" + if test -d "$remote" && + + # The remote might be our alternate. With + # this optimization we will bypass fetch-pack + # altogether, which means we cannot be doing + # the shallow stuff at all. + test ! -f "$GIT_DIR/shallow" && + test -z "$shallow_depth" && + + # See if all of what we are going to fetch are + # connected to our repository's tips, in which + # case we do not have to do any fetch. + theirs=$(git-fetch--tool -s pick-rref \ + "$rref" "$ls_remote_result") && + + # This will barf when $theirs reach an object that + # we do not have in our repository. Otherwise, + # we already have everything the fetch would bring in. + git-rev-list --objects $theirs --not --all \ + >/dev/null 2>/dev/null + then + git-fetch--tool pick-rref "$rref" "$ls_remote_result" + else + git-fetch-pack --thin $exec $keep $shallow_depth \ + $quiet $no_progress "$remote" $rref || + echo failed "$remote" + fi fi ) | ( @@ -239,16 +263,8 @@ fetch_per_ref () { fi # Find $remote_name from ls-remote output. - head=$( - IFS=' ' - echo "$ls_remote_result" | - while read sha1 name - do - test "z$name" = "z$remote_name" || continue - echo "$sha1" - break - done - ) + head=$(git-fetch--tool -s pick-rref \ + "$remote_name" "$ls_remote_result") expr "z$head" : "z$_x40\$" >/dev/null || die "No such ref $remote_name at $remote" echo >&2 "Fetching $remote_name from $remote using $proto" From b9849a1ab63143c3b70e339491a897ef62a4173b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 16 Apr 2007 00:42:29 -0700 Subject: [PATCH 3/3] Make sure quickfetch is not fooled with a previous, incomplete fetch. This updates git-rev-list --objects to be a bit more careful when listing a blob object to make sure the blob actually exists, and uses it to make sure the quick-fetch optimization we introduced earlier is not fooled by a previous incomplete fetch. The quick-fetch optimization works by running this command: git rev-list --objects <> --not --all where <> is a list of commits that we are going to fetch from the other side. If there is any object missing to complete the <>, the rev-list would fail and die (say, the commit was in our repository, but its tree wasn't -- then it will barf while trying to list the blobs the tree contains because it cannot read that tree). Usually we do not have the objects (otherwise why would we fetching?), but in one important special case we do: when the remote repository is used as an alternate object store (i.e. pointed by .git/objects/info/alternates). We could check .git/objects/info/alternates to see if the remote we are interacting with is one of them (or is used as an alternate, recursively, by one of them), but that check is more cumbersome than it is worth. The above check however did not catch missing blob, because object listing code did not read nor check blob objects, knowing that blobs do not contain any further references to other objects. This commit fixes it with practically unmeasurable overhead. I've benched this with git rev-list --objects --all >/dev/null in the kernel repository, with three different implementations of the "check-blob". - Checking with has_sha1_file() has negligible (unmeasurable) performance penalty. - Checking with sha1_object_info() makes it somewhat slower, perhaps by 5%. - Checking with read_sha1_file() to cause a fully re-validation is prohibitively expensive (about 4 times as much runtime). In my original patch, I had this as a command line option, but the overhead is small enough that it is not really worth it. Signed-off-by: Junio C Hamano --- builtin-rev-list.c | 4 ++ t/t5502-quickfetch.sh | 89 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100755 t/t5502-quickfetch.sh diff --git a/builtin-rev-list.c b/builtin-rev-list.c index 09774f9559..c0329dcecd 100644 --- a/builtin-rev-list.c +++ b/builtin-rev-list.c @@ -113,6 +113,10 @@ static void show_object(struct object_array_entry *p) * confuse downstream git-pack-objects very badly. */ const char *ep = strchr(p->name, '\n'); + + if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1)) + die("missing blob object '%s'", sha1_to_hex(p->item->sha1)); + if (ep) { printf("%s %.*s\n", sha1_to_hex(p->item->sha1), (int) (ep - p->name), diff --git a/t/t5502-quickfetch.sh b/t/t5502-quickfetch.sh new file mode 100755 index 0000000000..b4760f2dc0 --- /dev/null +++ b/t/t5502-quickfetch.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +test_description='test quickfetch from local' + +. ./test-lib.sh + +test_expect_success setup ' + + test_tick && + echo ichi >file && + git add file && + git commit -m initial && + + cnt=$( ( + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + test $cnt -eq 3 +' + +test_expect_success 'clone without alternate' ' + + ( + mkdir cloned && + cd cloned && + git init-db && + git remote add -f origin .. + ) && + cnt=$( ( + cd cloned && + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + test $cnt -eq 3 +' + +test_expect_success 'further commits in the original' ' + + test_tick && + echo ni >file && + git commit -a -m second && + + cnt=$( ( + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + test $cnt -eq 6 +' + +test_expect_success 'copy commit and tree but not blob by hand' ' + + git rev-list --objects HEAD | + git pack-objects --stdout | + ( + cd cloned && + git unpack-objects + ) && + + cnt=$( ( + cd cloned && + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + test $cnt -eq 6 + + blob=$(git rev-parse HEAD:file | sed -e "s|..|&/|") && + test -f "cloned/.git/objects/$blob" && + rm -f "cloned/.git/objects/$blob" && + + cnt=$( ( + cd cloned && + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + test $cnt -eq 5 + +' + +test_expect_success 'quickfetch should not leave a corrupted repository' ' + + ( + cd cloned && + git fetch + ) && + + cnt=$( ( + cd cloned && + git count-objects | sed -e "s/ *objects,.*//" + ) ) && + test $cnt -eq 6 + +' + +test_done