From 96328398b3803c96f6a91edc39ccebd4bb9dd12c Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 1 Sep 2021 14:54:41 +0200 Subject: [PATCH 1/2] pkt-line: add stdio packet write functions This adds three new functions to pkt-line.c: packet_fwrite, packet_fwrite_fmt and packet_fflush. Besides writing a pktline flush packet, packet_fflush also flushes the stdio buffer of the stream. Helped-by: Patrick Steinhardt Helped-by: Jeff King Signed-off-by: Jacob Vosmaer Signed-off-by: Junio C Hamano --- cache.h | 2 ++ pkt-line.c | 37 +++++++++++++++++++++++++++++++++++++ pkt-line.h | 11 +++++++++++ write-or-die.c | 12 ++++++++++++ 4 files changed, 62 insertions(+) diff --git a/cache.h b/cache.h index d23de69368..011dac4eb7 100644 --- a/cache.h +++ b/cache.h @@ -1738,6 +1738,8 @@ extern const char *git_mailmap_blob; void maybe_flush_or_die(FILE *, const char *); __attribute__((format (printf, 2, 3))) void fprintf_or_die(FILE *, const char *fmt, ...); +void fwrite_or_die(FILE *f, const void *buf, size_t count); +void fflush_or_die(FILE *f); #define COPY_READ_ERROR (-2) #define COPY_WRITE_ERROR (-3) diff --git a/pkt-line.c b/pkt-line.c index 9f63eae2e6..de4a94b437 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -243,6 +243,43 @@ void packet_write(int fd_out, const char *buf, size_t size) die("%s", err.buf); } +void packet_fwrite(FILE *f, const char *buf, size_t size) +{ + size_t packet_size; + char header[4]; + + if (size > LARGE_PACKET_DATA_MAX) + die(_("packet write failed - data exceeds max packet size")); + + packet_trace(buf, size, 1); + packet_size = size + 4; + + set_packet_header(header, packet_size); + fwrite_or_die(f, header, 4); + fwrite_or_die(f, buf, size); +} + +void packet_fwrite_fmt(FILE *fh, const char *fmt, ...) +{ + static struct strbuf buf = STRBUF_INIT; + va_list args; + + strbuf_reset(&buf); + + va_start(args, fmt); + format_packet(&buf, "", fmt, args); + va_end(args); + + fwrite_or_die(fh, buf.buf, buf.len); +} + +void packet_fflush(FILE *f) +{ + packet_trace("0000", 4, 1); + fwrite_or_die(f, "0000", 4); + fflush_or_die(f); +} + void packet_buf_write(struct strbuf *buf, const char *fmt, ...) { va_list args; diff --git a/pkt-line.h b/pkt-line.h index 5af5f45687..82b95e4bdd 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -35,6 +35,17 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format int write_packetized_from_fd_no_flush(int fd_in, int fd_out); int write_packetized_from_buf_no_flush(const char *src_in, size_t len, int fd_out); +/* + * Stdio versions of packet_write functions. When mixing these with fd + * based functions, take care to call fflush(3) before doing fd writes or + * closing the fd. + */ +void packet_fwrite(FILE *f, const char *buf, size_t size); +void packet_fwrite_fmt(FILE *f, const char *fmt, ...) __attribute__((format (printf, 2, 3))); + +/* packet_fflush writes a flush packet and flushes the stdio buffer of f */ +void packet_fflush(FILE *f); + /* * Read a packetized line into the buffer, which must be at least size bytes * long. The return value specifies the number of bytes read into the buffer. diff --git a/write-or-die.c b/write-or-die.c index d33e68f6ab..0b1ec8190b 100644 --- a/write-or-die.c +++ b/write-or-die.c @@ -70,3 +70,15 @@ void write_or_die(int fd, const void *buf, size_t count) die_errno("write error"); } } + +void fwrite_or_die(FILE *f, const void *buf, size_t count) +{ + if (fwrite(buf, 1, count, f) != count) + die_errno("fwrite error"); +} + +void fflush_or_die(FILE *f) +{ + if (fflush(f)) + die_errno("fflush error"); +} From 70afef5cdf29b5159f18df1b93722055f78740f8 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 1 Sep 2021 14:54:42 +0200 Subject: [PATCH 2/2] upload-pack: use stdio in send_ref callbacks In both protocol v0 and v2, upload-pack writes one pktline packet per advertised ref to stdout. That means one or two write(2) syscalls per ref. This is problematic if these writes become network sends with high overhead. This commit changes both send_ref callbacks to use buffered IO using stdio. To give an example of the impact: I set up a single-threaded loop that calls ls-remote (with HTTP and protocol v2) on a local GitLab instance, on a repository with 11K refs. When I switch from Git v2.32.0 to this patch, I see a 40% reduction in CPU time for Git, and 65% for Gitaly (GitLab's Git RPC service). So using buffered IO not only saves syscalls in upload-pack, it also saves time in things that consume upload-pack's output. Helped-by: Jeff King Signed-off-by: Jacob Vosmaer Signed-off-by: Junio C Hamano --- ls-refs.c | 4 ++-- upload-pack.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ls-refs.c b/ls-refs.c index 88f6c3f60d..e6a2dbd962 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -105,7 +105,7 @@ static int send_ref(const char *refname, const struct object_id *oid, } strbuf_addch(&refline, '\n'); - packet_write(1, refline.buf, refline.len); + packet_fwrite(stdout, refline.buf, refline.len); strbuf_release(&refline); return 0; @@ -171,7 +171,7 @@ int ls_refs(struct repository *r, struct strvec *keys, strvec_push(&data.prefixes, ""); for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v, send_ref, &data, 0); - packet_flush(1); + packet_fflush(stdout); strvec_clear(&data.prefixes); return 0; } diff --git a/upload-pack.c b/upload-pack.c index 297b76fcb4..0ed377b1fb 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1207,7 +1207,7 @@ static int send_ref(const char *refname, const struct object_id *oid, format_symref_info(&symref_info, &data->symref); format_session_id(&session_id, data); - packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", + packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", oid_to_hex(oid), refname_nons, 0, capabilities, (data->allow_uor & ALLOW_TIP_SHA1) ? @@ -1223,11 +1223,11 @@ static int send_ref(const char *refname, const struct object_id *oid, strbuf_release(&symref_info); strbuf_release(&session_id); } else { - packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons); + packet_fwrite_fmt(stdout, "%s %s\n", oid_to_hex(oid), refname_nons); } capabilities = NULL; if (!peel_iterated_oid(oid, &peeled)) - packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); + packet_fwrite_fmt(stdout, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons); return 0; } @@ -1348,6 +1348,11 @@ void upload_pack(struct upload_pack_options *options) reset_timeout(data.timeout); head_ref_namespaced(send_ref, &data); for_each_namespaced_ref(send_ref, &data); + /* + * fflush stdout before calling advertise_shallow_grafts because send_ref + * uses stdio. + */ + fflush_or_die(stdout); advertise_shallow_grafts(1); packet_flush(1); } else {