1
0
mirror of https://github.com/git/git.git synced 2024-09-28 18:32:37 +02:00
git/upload-pack.c
Shawn O. Pearce 78affc49de Add multi_ack_detailed capability to fetch-pack/upload-pack
When multi_ack_detailed is enabled the ACK continue messages returned
by the remote upload-pack are broken out to describe the different
states within the peer.  This permits the client to better understand
the server's in-memory state.

The fetch-pack/upload-pack protocol now looks like:

NAK
---------------------------------
  Always sent in response to "done" if there was no common base
  selected from the "have" lines (or no have lines were sent).

  * no multi_ack or multi_ack_detailed:

    Sent when the client has sent a pkt-line flush ("0000") and
    the server has not yet found a common base object.

  * either multi_ack or multi_ack_detailed:

    Always sent in response to a pkt-line flush.

ACK %s
-----------------------------------
  * no multi_ack or multi_ack_detailed:

    Sent in response to "have" when the object exists on the remote
    side and is therefore an object in common between the peers.
    The argument is the SHA-1 of the common object.

  * either multi_ack or multi_ack_detailed:

    Sent in response to "done" if there are common objects.
    The argument is the last SHA-1 determined to be common.

ACK %s continue
-----------------------------------
  * multi_ack only:

    Sent in response to "have".

    The remote side wants the client to consider this object as
    common, and immediately stop transmitting additional "have"
    lines for objects that are reachable from it.  The reason
    the client should stop is not given, but is one of the two
    cases below available under multi_ack_detailed.

ACK %s common
-----------------------------------
  * multi_ack_detailed only:

    Sent in response to "have".  Both sides have this object.
    Like with "ACK %s continue" above the client should stop
    sending have lines reachable for objects from the argument.

ACK %s ready
-----------------------------------
  * multi_ack_detailed only:

    Sent in response to "have".

    The client should stop transmitting objects which are reachable
    from the argument, and send "done" soon to get the objects.

    If the remote side has the specified object, it should
    first send an "ACK %s common" message prior to sending
    "ACK %s ready".

    Clients may still submit additional "have" lines if there are
    more side branches for the client to explore that might be added
    to the common set and reduce the number of objects to transfer.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-30 19:20:54 -07:00

771 lines
18 KiB
C

#include "cache.h"
#include "refs.h"
#include "pkt-line.h"
#include "sideband.h"
#include "tag.h"
#include "object.h"
#include "commit.h"
#include "exec_cmd.h"
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
#include "run-command.h"
static const char upload_pack_usage[] = "git upload-pack [--strict] [--timeout=nn] <dir>";
/* bits #0..7 in revision.h, #8..10 in commit.c */
#define THEY_HAVE (1u << 11)
#define OUR_REF (1u << 12)
#define WANTED (1u << 13)
#define COMMON_KNOWN (1u << 14)
#define REACHABLE (1u << 15)
#define SHALLOW (1u << 16)
#define NOT_SHALLOW (1u << 17)
#define CLIENT_SHALLOW (1u << 18)
static unsigned long oldest_have;
static int multi_ack, nr_our_refs;
static int use_thin_pack, use_ofs_delta, use_include_tag;
static int no_progress, daemon_mode;
static int shallow_nr;
static struct object_array have_obj;
static struct object_array want_obj;
static struct object_array extra_edge_obj;
static unsigned int timeout;
/* 0 for no sideband,
* otherwise maximum packet size (up to 65520 bytes).
*/
static int use_sideband;
static int debug_fd;
static void reset_timeout(void)
{
alarm(timeout);
}
static int strip(char *line, int len)
{
if (len && line[len-1] == '\n')
line[--len] = 0;
return len;
}
static ssize_t send_client_data(int fd, const char *data, ssize_t sz)
{
if (use_sideband)
return send_sideband(1, fd, data, sz, use_sideband);
if (fd == 3)
/* emergency quit */
fd = 2;
if (fd == 2) {
/* XXX: are we happy to lose stuff here? */
xwrite(fd, data, sz);
return sz;
}
return safe_write(fd, data, sz);
}
static FILE *pack_pipe = NULL;
static void show_commit(struct commit *commit, void *data)
{
if (commit->object.flags & BOUNDARY)
fputc('-', pack_pipe);
if (fputs(sha1_to_hex(commit->object.sha1), pack_pipe) < 0)
die("broken output pipe");
fputc('\n', pack_pipe);
fflush(pack_pipe);
free(commit->buffer);
commit->buffer = NULL;
}
static void show_object(struct object *obj, const struct name_path *path, const char *component)
{
/* An object with name "foo\n0000000..." can be used to
* confuse downstream git-pack-objects very badly.
*/
const char *name = path_name(path, component);
const char *ep = strchr(name, '\n');
if (ep) {
fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(obj->sha1),
(int) (ep - name),
name);
}
else
fprintf(pack_pipe, "%s %s\n",
sha1_to_hex(obj->sha1), name);
free((char *)name);
}
static void show_edge(struct commit *commit)
{
fprintf(pack_pipe, "-%s\n", sha1_to_hex(commit->object.sha1));
}
static int do_rev_list(int fd, void *create_full_pack)
{
int i;
struct rev_info revs;
pack_pipe = xfdopen(fd, "w");
init_revisions(&revs, NULL);
revs.tag_objects = 1;
revs.tree_objects = 1;
revs.blob_objects = 1;
if (use_thin_pack)
revs.edge_hint = 1;
if (create_full_pack) {
const char *args[] = {"rev-list", "--all", NULL};
setup_revisions(2, args, &revs, NULL);
} else {
for (i = 0; i < want_obj.nr; i++) {
struct object *o = want_obj.objects[i].item;
/* why??? */
o->flags &= ~UNINTERESTING;
add_pending_object(&revs, o, NULL);
}
for (i = 0; i < have_obj.nr; i++) {
struct object *o = have_obj.objects[i].item;
o->flags |= UNINTERESTING;
add_pending_object(&revs, o, NULL);
}
setup_revisions(0, NULL, &revs, NULL);
}
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
mark_edges_uninteresting(revs.commits, &revs, show_edge);
if (use_thin_pack)
for (i = 0; i < extra_edge_obj.nr; i++)
fprintf(pack_pipe, "-%s\n", sha1_to_hex(
extra_edge_obj.objects[i].item->sha1));
traverse_commit_list(&revs, show_commit, show_object, NULL);
fflush(pack_pipe);
fclose(pack_pipe);
return 0;
}
static int feed_msg_to_hook(int fd, const char *fmt, ...)
{
int cnt;
char buf[1024];
va_list params;
va_start(params, fmt);
cnt = vsprintf(buf, fmt, params);
va_end(params);
return write_in_full(fd, buf, cnt) != cnt;
}
static int feed_obj_to_hook(const char *label, struct object_array *oa, int i, int fd)
{
return feed_msg_to_hook(fd, "%s %s\n", label,
sha1_to_hex(oa->objects[i].item->sha1));
}
static int run_post_upload_pack_hook(size_t total, struct timeval *tv)
{
const char *argv[2];
struct child_process proc;
int err, i;
argv[0] = "hooks/post-upload-pack";
argv[1] = NULL;
if (access(argv[0], X_OK) < 0)
return 0;
memset(&proc, 0, sizeof(proc));
proc.argv = argv;
proc.in = -1;
proc.stdout_to_stderr = 1;
err = start_command(&proc);
if (err)
return err;
for (i = 0; !err && i < want_obj.nr; i++)
err |= feed_obj_to_hook("want", &want_obj, i, proc.in);
for (i = 0; !err && i < have_obj.nr; i++)
err |= feed_obj_to_hook("have", &have_obj, i, proc.in);
if (!err)
err |= feed_msg_to_hook(proc.in, "time %ld.%06ld\n",
(long)tv->tv_sec, (long)tv->tv_usec);
if (!err)
err |= feed_msg_to_hook(proc.in, "size %ld\n", (long)total);
if (!err)
err |= feed_msg_to_hook(proc.in, "kind %s\n",
(nr_our_refs == want_obj.nr && !have_obj.nr)
? "clone" : "fetch");
if (close(proc.in))
err = 1;
if (finish_command(&proc))
err = 1;
return err;
}
static void create_pack_file(void)
{
struct timeval start_tv, tv;
struct async rev_list;
struct child_process pack_objects;
int create_full_pack = (nr_our_refs == want_obj.nr && !have_obj.nr);
char data[8193], progress[128];
char abort_msg[] = "aborting due to possible repository "
"corruption on the remote side.";
int buffered = -1;
ssize_t sz, total_sz;
const char *argv[10];
int arg = 0;
gettimeofday(&start_tv, NULL);
total_sz = 0;
if (shallow_nr) {
rev_list.proc = do_rev_list;
rev_list.data = 0;
if (start_async(&rev_list))
die("git upload-pack: unable to fork git-rev-list");
argv[arg++] = "pack-objects";
} else {
argv[arg++] = "pack-objects";
argv[arg++] = "--revs";
if (create_full_pack)
argv[arg++] = "--all";
else if (use_thin_pack)
argv[arg++] = "--thin";
}
argv[arg++] = "--stdout";
if (!no_progress)
argv[arg++] = "--progress";
if (use_ofs_delta)
argv[arg++] = "--delta-base-offset";
if (use_include_tag)
argv[arg++] = "--include-tag";
argv[arg++] = NULL;
memset(&pack_objects, 0, sizeof(pack_objects));
pack_objects.in = shallow_nr ? rev_list.out : -1;
pack_objects.out = -1;
pack_objects.err = -1;
pack_objects.git_cmd = 1;
pack_objects.argv = argv;
if (start_command(&pack_objects))
die("git upload-pack: unable to fork git-pack-objects");
/* pass on revisions we (don't) want */
if (!shallow_nr) {
FILE *pipe_fd = xfdopen(pack_objects.in, "w");
if (!create_full_pack) {
int i;
for (i = 0; i < want_obj.nr; i++)
fprintf(pipe_fd, "%s\n", sha1_to_hex(want_obj.objects[i].item->sha1));
fprintf(pipe_fd, "--not\n");
for (i = 0; i < have_obj.nr; i++)
fprintf(pipe_fd, "%s\n", sha1_to_hex(have_obj.objects[i].item->sha1));
}
fprintf(pipe_fd, "\n");
fflush(pipe_fd);
fclose(pipe_fd);
}
/* We read from pack_objects.err to capture stderr output for
* progress bar, and pack_objects.out to capture the pack data.
*/
while (1) {
struct pollfd pfd[2];
int pe, pu, pollsize;
reset_timeout();
pollsize = 0;
pe = pu = -1;
if (0 <= pack_objects.out) {
pfd[pollsize].fd = pack_objects.out;
pfd[pollsize].events = POLLIN;
pu = pollsize;
pollsize++;
}
if (0 <= pack_objects.err) {
pfd[pollsize].fd = pack_objects.err;
pfd[pollsize].events = POLLIN;
pe = pollsize;
pollsize++;
}
if (!pollsize)
break;
if (poll(pfd, pollsize, -1) < 0) {
if (errno != EINTR) {
error("poll failed, resuming: %s",
strerror(errno));
sleep(1);
}
continue;
}
if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) {
/* Data ready; we keep the last byte to ourselves
* in case we detect broken rev-list, so that we
* can leave the stream corrupted. This is
* unfortunate -- unpack-objects would happily
* accept a valid packdata with trailing garbage,
* so appending garbage after we pass all the
* pack data is not good enough to signal
* breakage to downstream.
*/
char *cp = data;
ssize_t outsz = 0;
if (0 <= buffered) {
*cp++ = buffered;
outsz++;
}
sz = xread(pack_objects.out, cp,
sizeof(data) - outsz);
if (0 < sz)
total_sz += sz;
else if (sz == 0) {
close(pack_objects.out);
pack_objects.out = -1;
}
else
goto fail;
sz += outsz;
if (1 < sz) {
buffered = data[sz-1] & 0xFF;
sz--;
}
else
buffered = -1;
sz = send_client_data(1, data, sz);
if (sz < 0)
goto fail;
}
if (0 <= pe && (pfd[pe].revents & (POLLIN|POLLHUP))) {
/* Status ready; we ship that in the side-band
* or dump to the standard error.
*/
sz = xread(pack_objects.err, progress,
sizeof(progress));
if (0 < sz)
send_client_data(2, progress, sz);
else if (sz == 0) {
close(pack_objects.err);
pack_objects.err = -1;
}
else
goto fail;
}
}
if (finish_command(&pack_objects)) {
error("git upload-pack: git-pack-objects died with error.");
goto fail;
}
if (shallow_nr && finish_async(&rev_list))
goto fail; /* error was already reported */
/* flush the data */
if (0 <= buffered) {
data[0] = buffered;
sz = send_client_data(1, data, 1);
if (sz < 0)
goto fail;
fprintf(stderr, "flushed.\n");
}
if (use_sideband)
packet_flush(1);
gettimeofday(&tv, NULL);
tv.tv_sec -= start_tv.tv_sec;
if (tv.tv_usec < start_tv.tv_usec) {
tv.tv_sec--;
tv.tv_usec += 1000000;
}
tv.tv_usec -= start_tv.tv_usec;
if (run_post_upload_pack_hook(total_sz, &tv))
warning("post-upload-hook failed");
return;
fail:
send_client_data(3, abort_msg, sizeof(abort_msg));
die("git upload-pack: %s", abort_msg);
}
static int got_sha1(char *hex, unsigned char *sha1)
{
struct object *o;
int we_knew_they_have = 0;
if (get_sha1_hex(hex, sha1))
die("git upload-pack: expected SHA1 object, got '%s'", hex);
if (!has_sha1_file(sha1))
return -1;
o = lookup_object(sha1);
if (!(o && o->parsed))
o = parse_object(sha1);
if (!o)
die("oops (%s)", sha1_to_hex(sha1));
if (o->type == OBJ_COMMIT) {
struct commit_list *parents;
struct commit *commit = (struct commit *)o;
if (o->flags & THEY_HAVE)
we_knew_they_have = 1;
else
o->flags |= THEY_HAVE;
if (!oldest_have || (commit->date < oldest_have))
oldest_have = commit->date;
for (parents = commit->parents;
parents;
parents = parents->next)
parents->item->object.flags |= THEY_HAVE;
}
if (!we_knew_they_have) {
add_object_array(o, NULL, &have_obj);
return 1;
}
return 0;
}
static int reachable(struct commit *want)
{
struct commit_list *work = NULL;
insert_by_date(want, &work);
while (work) {
struct commit_list *list = work->next;
struct commit *commit = work->item;
free(work);
work = list;
if (commit->object.flags & THEY_HAVE) {
want->object.flags |= COMMON_KNOWN;
break;
}
if (!commit->object.parsed)
parse_object(commit->object.sha1);
if (commit->object.flags & REACHABLE)
continue;
commit->object.flags |= REACHABLE;
if (commit->date < oldest_have)
continue;
for (list = commit->parents; list; list = list->next) {
struct commit *parent = list->item;
if (!(parent->object.flags & REACHABLE))
insert_by_date(parent, &work);
}
}
want->object.flags |= REACHABLE;
clear_commit_marks(want, REACHABLE);
free_commit_list(work);
return (want->object.flags & COMMON_KNOWN);
}
static int ok_to_give_up(void)
{
int i;
if (!have_obj.nr)
return 0;
for (i = 0; i < want_obj.nr; i++) {
struct object *want = want_obj.objects[i].item;
if (want->flags & COMMON_KNOWN)
continue;
want = deref_tag(want, "a want line", 0);
if (!want || want->type != OBJ_COMMIT) {
/* no way to tell if this is reachable by
* looking at the ancestry chain alone, so
* leave a note to ourselves not to worry about
* this object anymore.
*/
want_obj.objects[i].item->flags |= COMMON_KNOWN;
continue;
}
if (!reachable((struct commit *)want))
return 0;
}
return 1;
}
static int get_common_commits(void)
{
static char line[1000];
unsigned char sha1[20];
char last_hex[41];
save_commit_buffer = 0;
for (;;) {
int len = packet_read_line(0, line, sizeof(line));
reset_timeout();
if (!len) {
if (have_obj.nr == 0 || multi_ack)
packet_write(1, "NAK\n");
continue;
}
strip(line, len);
if (!prefixcmp(line, "have ")) {
switch (got_sha1(line+5, sha1)) {
case -1: /* they have what we do not */
if (multi_ack && ok_to_give_up()) {
const char *hex = sha1_to_hex(sha1);
if (multi_ack == 2)
packet_write(1, "ACK %s ready\n", hex);
else
packet_write(1, "ACK %s continue\n", hex);
}
break;
default:
memcpy(last_hex, sha1_to_hex(sha1), 41);
if (multi_ack == 2)
packet_write(1, "ACK %s common\n", last_hex);
else if (multi_ack)
packet_write(1, "ACK %s continue\n", last_hex);
else if (have_obj.nr == 1)
packet_write(1, "ACK %s\n", last_hex);
break;
}
continue;
}
if (!strcmp(line, "done")) {
if (have_obj.nr > 0) {
if (multi_ack)
packet_write(1, "ACK %s\n", last_hex);
return 0;
}
packet_write(1, "NAK\n");
return -1;
}
die("git upload-pack: expected SHA1 list, got '%s'", line);
}
}
static void receive_needs(void)
{
struct object_array shallows = {0, 0, NULL};
static char line[1000];
int len, depth = 0;
shallow_nr = 0;
if (debug_fd)
write_str_in_full(debug_fd, "#S\n");
for (;;) {
struct object *o;
unsigned char sha1_buf[20];
len = packet_read_line(0, line, sizeof(line));
reset_timeout();
if (!len)
break;
if (debug_fd)
write_in_full(debug_fd, line, len);
if (!prefixcmp(line, "shallow ")) {
unsigned char sha1[20];
struct object *object;
if (get_sha1(line + 8, sha1))
die("invalid shallow line: %s", line);
object = parse_object(sha1);
if (!object)
die("did not find object for %s", line);
object->flags |= CLIENT_SHALLOW;
add_object_array(object, NULL, &shallows);
continue;
}
if (!prefixcmp(line, "deepen ")) {
char *end;
depth = strtol(line + 7, &end, 0);
if (end == line + 7 || depth <= 0)
die("Invalid deepen: %s", line);
continue;
}
if (prefixcmp(line, "want ") ||
get_sha1_hex(line+5, sha1_buf))
die("git upload-pack: protocol error, "
"expected to get sha, not '%s'", line);
if (strstr(line+45, "multi_ack_detailed"))
multi_ack = 2;
else if (strstr(line+45, "multi_ack"))
multi_ack = 1;
if (strstr(line+45, "thin-pack"))
use_thin_pack = 1;
if (strstr(line+45, "ofs-delta"))
use_ofs_delta = 1;
if (strstr(line+45, "side-band-64k"))
use_sideband = LARGE_PACKET_MAX;
else if (strstr(line+45, "side-band"))
use_sideband = DEFAULT_PACKET_MAX;
if (strstr(line+45, "no-progress"))
no_progress = 1;
if (strstr(line+45, "include-tag"))
use_include_tag = 1;
/* We have sent all our refs already, and the other end
* should have chosen out of them; otherwise they are
* asking for nonsense.
*
* Hmph. We may later want to allow "want" line that
* asks for something like "master~10" (symbolic)...
* would it make sense? I don't know.
*/
o = lookup_object(sha1_buf);
if (!o || !(o->flags & OUR_REF))
die("git upload-pack: not our ref %s", line+5);
if (!(o->flags & WANTED)) {
o->flags |= WANTED;
add_object_array(o, NULL, &want_obj);
}
}
if (debug_fd)
write_str_in_full(debug_fd, "#E\n");
if (!use_sideband && daemon_mode)
no_progress = 1;
if (depth == 0 && shallows.nr == 0)
return;
if (depth > 0) {
struct commit_list *result, *backup;
int i;
backup = result = get_shallow_commits(&want_obj, depth,
SHALLOW, NOT_SHALLOW);
while (result) {
struct object *object = &result->item->object;
if (!(object->flags & (CLIENT_SHALLOW|NOT_SHALLOW))) {
packet_write(1, "shallow %s",
sha1_to_hex(object->sha1));
register_shallow(object->sha1);
shallow_nr++;
}
result = result->next;
}
free_commit_list(backup);
for (i = 0; i < shallows.nr; i++) {
struct object *object = shallows.objects[i].item;
if (object->flags & NOT_SHALLOW) {
struct commit_list *parents;
packet_write(1, "unshallow %s",
sha1_to_hex(object->sha1));
object->flags &= ~CLIENT_SHALLOW;
/* make sure the real parents are parsed */
unregister_shallow(object->sha1);
object->parsed = 0;
if (parse_commit((struct commit *)object))
die("invalid commit");
parents = ((struct commit *)object)->parents;
while (parents) {
add_object_array(&parents->item->object,
NULL, &want_obj);
parents = parents->next;
}
add_object_array(object, NULL, &extra_edge_obj);
}
/* make sure commit traversal conforms to client */
register_shallow(object->sha1);
}
packet_flush(1);
} else
if (shallows.nr > 0) {
int i;
for (i = 0; i < shallows.nr; i++)
register_shallow(shallows.objects[i].item->sha1);
}
shallow_nr += shallows.nr;
free(shallows.objects);
}
static int send_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
{
static const char *capabilities = "multi_ack thin-pack side-band"
" side-band-64k ofs-delta shallow no-progress"
" include-tag multi_ack_detailed";
struct object *o = parse_object(sha1);
if (!o)
die("git upload-pack: cannot find object %s:", sha1_to_hex(sha1));
if (capabilities)
packet_write(1, "%s %s%c%s\n", sha1_to_hex(sha1), refname,
0, capabilities);
else
packet_write(1, "%s %s\n", sha1_to_hex(sha1), refname);
capabilities = NULL;
if (!(o->flags & OUR_REF)) {
o->flags |= OUR_REF;
nr_our_refs++;
}
if (o->type == OBJ_TAG) {
o = deref_tag(o, refname, 0);
if (o)
packet_write(1, "%s %s^{}\n", sha1_to_hex(o->sha1), refname);
}
return 0;
}
static void upload_pack(void)
{
reset_timeout();
head_ref(send_ref, NULL);
for_each_ref(send_ref, NULL);
packet_flush(1);
receive_needs();
if (want_obj.nr) {
get_common_commits();
create_pack_file();
}
}
int main(int argc, char **argv)
{
char *dir;
int i;
int strict = 0;
git_extract_argv0_path(argv[0]);
read_replace_refs = 0;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
if (arg[0] != '-')
break;
if (!strcmp(arg, "--strict")) {
strict = 1;
continue;
}
if (!prefixcmp(arg, "--timeout=")) {
timeout = atoi(arg+10);
daemon_mode = 1;
continue;
}
if (!strcmp(arg, "--")) {
i++;
break;
}
}
if (i != argc-1)
usage(upload_pack_usage);
setup_path();
dir = argv[i];
if (!enter_repo(dir, strict))
die("'%s' does not appear to be a git repository", dir);
if (is_repository_shallow())
die("attempt to fetch/clone from a shallow repository");
if (getenv("GIT_DEBUG_SEND_PACK"))
debug_fd = atoi(getenv("GIT_DEBUG_SEND_PACK"));
upload_pack();
return 0;
}