From 88780c37b314d783db09118179ceba30096ef935 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Sep 2017 16:27:17 -0400 Subject: [PATCH 1/7] files-backend: prefer "0" for write_in_full() error check Commit 06f46f237a (avoid "write_in_full(fd, buf, len) != len" pattern, 2017-09-13) converted this callsite from: write_in_full(...) != 1 to write_in_full(...) < 0 But during the conflict resolution in c50424a6f0 (Merge branch 'jk/write-in-full-fix', 2017-09-25), this morphed into write_in_full(...) < 1 This behaves as we want, but we prefer to avoid modeling the "less than length" error-check which can be subtly buggy, as shown in efacf609c8 (config: avoid "write_in_full(fd, buf, len) < len" pattern, 2017-09-13). Signed-off-by: Jeff King Reviewed-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- refs/files-backend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index dac33628b3..e35c64c001 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -3007,7 +3007,7 @@ static int files_reflog_expire(struct ref_store *ref_store, } else if (update && (write_in_full(get_lock_file_fd(&lock->lk), oid_to_hex(&cb.last_kept_oid), GIT_SHA1_HEXSZ) < 0 || - write_str_in_full(get_lock_file_fd(&lock->lk), "\n") < 1 || + write_str_in_full(get_lock_file_fd(&lock->lk), "\n") < 0 || close_ref_gently(lock) < 0)) { status |= error("couldn't write %s", get_lock_file_path(&lock->lk)); From a1f3515da74504db0a046759d9ac1615a1d5f4b8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 25 Sep 2017 16:27:57 -0400 Subject: [PATCH 2/7] notes-merge: drop dead zero-write code We call write_in_full() with a size that we know is greater than zero. The return value can never be zero, then, since write_in_full() converts such a failed write() into ENOSPC and returns -1. We can just drop this branch of the error handling entirely. Suggested-by: Jonathan Nieder Signed-off-by: Jeff King Reviewed-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- notes-merge.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/notes-merge.c b/notes-merge.c index 597d43f65c..4352c34a6e 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -308,8 +308,6 @@ static void write_buf_to_worktree(const struct object_id *obj, if (errno == EPIPE) break; die_errno("notes-merge"); - } else if (!ret) { - die("notes-merge: disk full?"); } size -= ret; buf += ret; From 61d36330b422237b6be9581cdbade07782ab61a8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Sep 2017 02:00:28 -0400 Subject: [PATCH 3/7] prefer "!=" when checking read_in_full() result Comparing the result of read_in_full() using less-than is potentially dangerous, as a negative return value may be converted to an unsigned type and be considered a success. This is discussed further in 561598cfcf (read_pack_header: handle signed/unsigned comparison in read result, 2017-09-13). Each of these instances is actually fine in practice: - in get-tar-commit-id, the HEADERSIZE macro expands to a signed integer. If it were switched to an unsigned type (e.g., a size_t), then it would be a bug. - the other two callers check for a short read only after handling a negative return separately. This is a fine practice, but we'd prefer to model "!=" as a general rule. So all of these cases can be considered cleanups and not actual bugfixes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/get-tar-commit-id.c | 2 +- csum-file.c | 2 +- pkt-line.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index 6d9a79f9b3..cd3e656828 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -26,7 +26,7 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) usage(builtin_get_tar_commit_id_usage); n = read_in_full(0, buffer, HEADERSIZE); - if (n < HEADERSIZE) + if (n != HEADERSIZE) die("git get-tar-commit-id: read error"); if (header->typeflag[0] != 'g') return 1; diff --git a/csum-file.c b/csum-file.c index a172199e44..2adae04073 100644 --- a/csum-file.c +++ b/csum-file.c @@ -19,7 +19,7 @@ static void flush(struct sha1file *f, const void *buf, unsigned int count) if (ret < 0) die_errno("%s: sha1 file read error", f->name); - if (ret < count) + if (ret != count) die("%s: sha1 file truncated", f->name); if (memcmp(buf, check_buffer, count)) die("sha1 file '%s' validation error", f->name); diff --git a/pkt-line.c b/pkt-line.c index 647bbd3bce..93ea311443 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -258,7 +258,7 @@ static int get_packet_data(int fd, char **src_buf, size_t *src_size, } /* And complain if we didn't get enough bytes to satisfy the read. */ - if (ret < size) { + if (ret != size) { if (options & PACKET_READ_GENTLE_ON_EOF) return -1; From 90dca6710e6e5aad5d78d0cd006c3adadb65524d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Sep 2017 02:01:07 -0400 Subject: [PATCH 4/7] avoid looking at errno for short read_in_full() returns When a caller tries to read a particular set of bytes via read_in_full(), there are three possible outcomes: 1. An error, in which case -1 is returned and errno is set. 2. A short read, in which fewer bytes are returned and errno is unspecified (we never saw a read error, so we may have some random value from whatever syscall failed last). 3. The full read completed successfully. Many callers handle cases 1 and 2 together by just checking the result against the requested size. If their combined error path looks at errno (e.g., by calling die_errno), they may report a nonsense value. Let's fix these sites by having them distinguish between the two error cases. That avoids the random errno confusion, and lets us give more detailed error messages. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-write.c | 7 ++++++- sha1_file.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pack-write.c b/pack-write.c index a333ec6754..fea6284192 100644 --- a/pack-write.c +++ b/pack-write.c @@ -213,14 +213,19 @@ void fixup_pack_header_footer(int pack_fd, git_SHA_CTX old_sha1_ctx, new_sha1_ctx; struct pack_header hdr; char *buf; + ssize_t read_result; git_SHA1_Init(&old_sha1_ctx); git_SHA1_Init(&new_sha1_ctx); if (lseek(pack_fd, 0, SEEK_SET) != 0) die_errno("Failed seeking to start of '%s'", pack_name); - if (read_in_full(pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + read_result = read_in_full(pack_fd, &hdr, sizeof(hdr)); + if (read_result < 0) die_errno("Unable to reread header of '%s'", pack_name); + else if (read_result != sizeof(hdr)) + die_errno("Unexpected short read for header of '%s'", + pack_name); if (lseek(pack_fd, 0, SEEK_SET) != 0) die_errno("Failed seeking to start of '%s'", pack_name); git_SHA1_Update(&old_sha1_ctx, &hdr, sizeof(hdr)); diff --git a/sha1_file.c b/sha1_file.c index dd7cbe52ef..11346cf6d8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1757,10 +1757,15 @@ static int index_core(unsigned char *sha1, int fd, size_t size, ret = index_mem(sha1, "", size, type, path, flags); } else if (size <= SMALL_FILE_SIZE) { char *buf = xmalloc(size); - if (size == read_in_full(fd, buf, size)) - ret = index_mem(sha1, buf, size, type, path, flags); + ssize_t read_result = read_in_full(fd, buf, size); + if (read_result < 0) + ret = error_errno("read error while indexing %s", + path ? path : ""); + else if (read_result != size) + ret = error("short read while indexing %s", + path ? path : ""); else - ret = error_errno("short read"); + ret = index_mem(sha1, buf, size, type, path, flags); free(buf); } else { void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); From 41dcc4dcccecca49e3f75212ce9e614ffe2bdcc8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Sep 2017 02:02:11 -0400 Subject: [PATCH 5/7] distinguish error versus short read from read_in_full() Many callers of read_in_full() expect to see the exact number of bytes requested, but their error handling lumps together true read errors and short reads due to unexpected EOF. We can give more specific error messages by separating these cases (showing errno when appropriate, and otherwise describing the short read). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/get-tar-commit-id.c | 4 +++- bulk-checkin.c | 5 ++++- packfile.c | 11 +++++++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/builtin/get-tar-commit-id.c b/builtin/get-tar-commit-id.c index cd3e656828..2706fcfaf2 100644 --- a/builtin/get-tar-commit-id.c +++ b/builtin/get-tar-commit-id.c @@ -26,8 +26,10 @@ int cmd_get_tar_commit_id(int argc, const char **argv, const char *prefix) usage(builtin_get_tar_commit_id_usage); n = read_in_full(0, buffer, HEADERSIZE); + if (n < 0) + die_errno("git get-tar-commit-id: read error"); if (n != HEADERSIZE) - die("git get-tar-commit-id: read error"); + die_errno("git get-tar-commit-id: EOF before reading tar header"); if (header->typeflag[0] != 'g') return 1; if (!skip_prefix(content, "52 comment=", &comment)) diff --git a/bulk-checkin.c b/bulk-checkin.c index 9a1f6c49ab..3310fd210a 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -115,7 +115,10 @@ static int stream_to_pack(struct bulk_checkin_state *state, if (size && !s.avail_in) { ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf); - if (read_in_full(fd, ibuf, rsize) != rsize) + ssize_t read_result = read_in_full(fd, ibuf, rsize); + if (read_result < 0) + die_errno("failed to read from '%s'", path); + if (read_result != rsize) die("failed to read %d bytes from '%s'", (int)rsize, path); offset += rsize; diff --git a/packfile.c b/packfile.c index f86fa051c9..263efb7151 100644 --- a/packfile.c +++ b/packfile.c @@ -444,6 +444,7 @@ static int open_packed_git_1(struct packed_git *p) unsigned char sha1[20]; unsigned char *idx_sha1; long fd_flag; + ssize_t read_result; if (!p->index_data && open_pack_index(p)) return error("packfile %s index unavailable", p->pack_name); @@ -485,7 +486,10 @@ static int open_packed_git_1(struct packed_git *p) return error("cannot set FD_CLOEXEC"); /* Verify we recognize this pack file format. */ - if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr)); + if (read_result < 0) + return error_errno("error reading from %s", p->pack_name); + if (read_result != sizeof(hdr)) return error("file %s is far too short to be a packfile", p->pack_name); if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) return error("file %s is not a GIT packfile", p->pack_name); @@ -502,7 +506,10 @@ static int open_packed_git_1(struct packed_git *p) p->num_objects); if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1) return error("end of packfile %s is unavailable", p->pack_name); - if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1)) + read_result = read_in_full(p->pack_fd, sha1, sizeof(sha1)); + if (read_result < 0) + return error_errno("error reading from %s", p->pack_name); + if (read_result != sizeof(sha1)) return error("packfile %s signature is unavailable", p->pack_name); idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40; if (hashcmp(sha1, idx_sha1)) From 228740b67b55f4ee23637bd1472a73ae50efe93a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Sep 2017 02:02:21 -0400 Subject: [PATCH 6/7] worktree: use xsize_t to access file size To read the "gitdir" file into memory, we stat the file and allocate a buffer. But we store the size in an "int", which may be truncated. We should use a size_t and xsize_t(), which will detect truncation. An overflow is unlikely for a "gitdir" file, but it's a good practice to model. Signed-off-by: Jeff King Reviewed-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- builtin/worktree.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index de26849f55..2f4a4ef9cd 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -38,7 +38,8 @@ static int prune_worktree(const char *id, struct strbuf *reason) { struct stat st; char *path; - int fd, len; + int fd; + size_t len; if (!is_directory(git_path("worktrees/%s", id))) { strbuf_addf(reason, _("Removing worktrees/%s: not a valid directory"), id); @@ -56,7 +57,7 @@ static int prune_worktree(const char *id, struct strbuf *reason) id, strerror(errno)); return 1; } - len = st.st_size; + len = xsize_t(st.st_size); path = xmallocz(len); read_in_full(fd, path, len); close(fd); From 8a1a8d2ad1b41a0a28d37d1d21ee9620a23e91eb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Sep 2017 02:02:27 -0400 Subject: [PATCH 7/7] worktree: check the result of read_in_full() We try to read "len" bytes into a buffer and just assume that it happened correctly. In practice this should usually be the case, since we just stat'd the file to get the length. But we could be fooled by transient errors or by other processes racily truncating the file. Let's be more careful. There's a slim chance this could catch a real error, but it also prevents people and tools from getting worried while reading the code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/worktree.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/builtin/worktree.c b/builtin/worktree.c index 2f4a4ef9cd..7b9307aa58 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -40,6 +40,7 @@ static int prune_worktree(const char *id, struct strbuf *reason) char *path; int fd; size_t len; + ssize_t read_result; if (!is_directory(git_path("worktrees/%s", id))) { strbuf_addf(reason, _("Removing worktrees/%s: not a valid directory"), id); @@ -59,8 +60,24 @@ static int prune_worktree(const char *id, struct strbuf *reason) } len = xsize_t(st.st_size); path = xmallocz(len); - read_in_full(fd, path, len); + + read_result = read_in_full(fd, path, len); + if (read_result < 0) { + strbuf_addf(reason, _("Removing worktrees/%s: unable to read gitdir file (%s)"), + id, strerror(errno)); + close(fd); + free(path); + return 1; + } close(fd); + + if (read_result != len) { + strbuf_addf(reason, + _("Removing worktrees/%s: short read (expected %"PRIuMAX" bytes, read %"PRIuMAX")"), + id, (uintmax_t)len, (uintmax_t)read_result); + free(path); + return 1; + } while (len && (path[len - 1] == '\n' || path[len - 1] == '\r')) len--; if (!len) {