1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-04 06:36:35 +02:00
git/strbuf.c

1074 lines
23 KiB
C
Raw Normal View History

#include "git-compat-util.h"
#include "gettext.h"
#include "hex-ll.h"
#include "strbuf.h"
#include "string-list.h"
#include "utf8.h"
#include "date.h"
int starts_with(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 1;
else if (*str != *prefix)
return 0;
}
int istarts_with(const char *str, const char *prefix)
{
for (; ; str++, prefix++)
if (!*prefix)
return 1;
else if (tolower(*str) != tolower(*prefix))
return 0;
}
find multi-byte comment chars in unterminated buffers As with the previous patch, we need to swap out single-byte matching for something like starts_with() to match all bytes of a multi-byte comment character. But for cases where the buffer is not NUL-terminated (and we instead have an explicit size or end pointer), it's not safe to use starts_with(), as it might walk off the end of the buffer. Let's introduce a new starts_with_mem() that does the same thing but also accepts the length of the "haystack" str and makes sure not to walk past it. Note that in most cases the existing code did not need a length check at all, since it was written in a way that knew we had at least one byte available (and that was all we checked). So I had to read each one to find the appropriate bounds. The one exception is sequencer.c's add_commented_lines(), where we can actually get rid of the length check. Just like starts_with(), our starts_with_mem() handles an empty haystack variable by not matching (assuming a non-empty prefix). A few notes on the implementation of starts_with_mem(): - it would be equally correct to take an "end" pointer (and indeed, many of the callers have this and have to subtract to come up with the length). I think taking a ptr/size combo is a more usual interface for our codebase, though, and has the added benefit that the function signature makes it harder to mix up the three parameters. - we could obviously build starts_with() on top of this by passing strlen(str) as the length. But it's possible that starts_with() is a relatively hot code path, and it should not pay that penalty (it can generally return an answer proportional to the size of the prefix, not the whole string). - it naively feels like xstrncmpz() should be able to do the same thing, but that's not quite true. If you pass the length of the haystack buffer, then strncmp() finds that a shorter prefix string is "less than" than the haystack, even if the haystack starts with the prefix. If you pass the length of the prefix, then you risk reading past the end of the haystack if it is shorter than the prefix. So I think we really do need a new function. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-12 10:17:39 +01:00
int starts_with_mem(const char *str, size_t len, const char *prefix)
{
const char *end = str + len;
for (; ; str++, prefix++) {
if (!*prefix)
return 1;
else if (str == end || *str != *prefix)
return 0;
}
}
int skip_to_optional_arg_default(const char *str, const char *prefix,
const char **arg, const char *def)
{
const char *p;
if (!skip_prefix(str, prefix, &p))
return 0;
if (!*p) {
if (arg)
*arg = def;
return 1;
}
if (*p != '=')
return 0;
if (arg)
*arg = p + 1;
return 1;
}
/*
* Used as the default ->buf value, so that people can always assume
* buf is non NULL and ->buf is NUL terminated even for a freshly
* initialized strbuf.
*/
char strbuf_slopbuf[1];
void strbuf_init(struct strbuf *sb, size_t hint)
{
struct strbuf blank = STRBUF_INIT;
memcpy(sb, &blank, sizeof(*sb));
2011-08-29 23:16:12 +02:00
if (hint)
strbuf_grow(sb, hint);
}
void strbuf_release(struct strbuf *sb)
{
if (sb->alloc) {
free(sb->buf);
strbuf_init(sb, 0);
}
}
char *strbuf_detach(struct strbuf *sb, size_t *sz)
{
char *res;
strbuf_grow(sb, 0);
res = sb->buf;
if (sz)
*sz = sb->len;
strbuf_init(sb, 0);
return res;
}
void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc)
{
strbuf_release(sb);
sb->buf = buf;
sb->len = len;
sb->alloc = alloc;
strbuf_grow(sb, 0);
sb->buf[sb->len] = '\0';
}
void strbuf_grow(struct strbuf *sb, size_t extra)
{
2011-08-29 23:16:12 +02:00
int new_buf = !sb->alloc;
if (unsigned_add_overflows(extra, 1) ||
unsigned_add_overflows(sb->len, extra + 1))
die("you want to use way too much memory");
2011-08-29 23:16:12 +02:00
if (new_buf)
sb->buf = NULL;
ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
2011-08-29 23:16:12 +02:00
if (new_buf)
sb->buf[0] = '\0';
}
void strbuf_trim(struct strbuf *sb)
{
strbuf_rtrim(sb);
strbuf_ltrim(sb);
}
void strbuf_rtrim(struct strbuf *sb)
{
while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
sb->len--;
sb->buf[sb->len] = '\0';
}
void strbuf_trim_trailing_dir_sep(struct strbuf *sb)
{
while (sb->len > 0 && is_dir_sep((unsigned char)sb->buf[sb->len - 1]))
sb->len--;
sb->buf[sb->len] = '\0';
}
void strbuf_trim_trailing_newline(struct strbuf *sb)
{
if (sb->len > 0 && sb->buf[sb->len - 1] == '\n') {
if (--sb->len > 0 && sb->buf[sb->len - 1] == '\r')
--sb->len;
sb->buf[sb->len] = '\0';
}
}
void strbuf_ltrim(struct strbuf *sb)
{
char *b = sb->buf;
while (sb->len > 0 && isspace(*b)) {
b++;
sb->len--;
}
memmove(sb->buf, b, sb->len);
sb->buf[sb->len] = '\0';
}
int strbuf_reencode(struct strbuf *sb, const char *from, const char *to)
{
char *out;
size_t len;
if (same_encoding(from, to))
return 0;
out = reencode_string_len(sb->buf, sb->len, to, from, &len);
if (!out)
return -1;
strbuf_attach(sb, out, len, len);
return 0;
}
void strbuf_tolower(struct strbuf *sb)
{
char *p = sb->buf, *end = sb->buf + sb->len;
for (; p < end; p++)
*p = tolower(*p);
}
struct strbuf **strbuf_split_buf(const char *str, size_t slen,
int terminator, int max)
{
struct strbuf **ret = NULL;
size_t nr = 0, alloc = 0;
struct strbuf *t;
while (slen) {
int len = slen;
if (max <= 0 || nr + 1 < max) {
const char *end = memchr(str, terminator, slen);
if (end)
len = end - str + 1;
}
t = xmalloc(sizeof(struct strbuf));
strbuf_init(t, len);
strbuf_add(t, str, len);
ALLOC_GROW(ret, nr + 2, alloc);
ret[nr++] = t;
str += len;
slen -= len;
}
ALLOC_GROW(ret, nr + 1, alloc); /* In case string was empty */
ret[nr] = NULL;
return ret;
}
void strbuf_add_separated_string_list(struct strbuf *str,
const char *sep,
struct string_list *slist)
{
struct string_list_item *item;
int sep_needed = 0;
for_each_string_list_item(item, slist) {
if (sep_needed)
strbuf_addstr(str, sep);
strbuf_addstr(str, item->string);
sep_needed = 1;
}
}
void strbuf_list_free(struct strbuf **sbs)
{
struct strbuf **s = sbs;
mailinfo: also free strbuf lists when clearing mailinfo mailinfo.p_hdr_info/s_hdr_info are null-terminated lists of strbuf's, with entries pointing either to NULL or an allocated strbuf. Therefore we need to free those strbuf's (and not just the data they contain) whenever we're done with a given entry. (See handle_header() where those new strbufs are malloc'd.) Once we no longer need the list (and not just its entries) we can switch over to strbuf_list_free() instead of manually iterating over the list, which takes care of those additional details for us. We can only do this in clear_mailinfo() - in handle_commit_message() we are only clearing the array contents but want to reuse the array itself, hence we can't use strbuf_list_free() there. However, strbuf_list_free() cannot handle a NULL input, and the lists we are freeing might be NULL. Therefore we add a NULL check in strbuf_list_free() to make it safe to use with a NULL input (which is a pattern used by some of the other *_free() functions around git). Leak output from t0023: Direct leak of 72 byte(s) in 3 object(s) allocated from: #0 0x49a85d in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:145:3 #1 0x9ac9f4 in do_xmalloc wrapper.c:41:8 #2 0x9ac9ca in xmalloc wrapper.c:62:9 #3 0x7f6cf7 in handle_header mailinfo.c:205:10 #4 0x7f5abf in check_header mailinfo.c:583:4 #5 0x7f5524 in mailinfo mailinfo.c:1197:3 #6 0x4dcc95 in parse_mail builtin/am.c:1167:6 #7 0x4d9070 in am_run builtin/am.c:1732:12 #8 0x4d5b7a in cmd_am builtin/am.c:2398:3 #9 0x4cd91d in run_builtin git.c:467:11 #10 0x4cb5f3 in handle_builtin git.c:719:3 #11 0x4ccf47 in run_argv git.c:808:4 #12 0x4caf49 in cmd_main git.c:939:19 #13 0x69e43e in main common-main.c:52:11 #14 0x7fc1fadfa349 in __libc_start_main (/lib64/libc.so.6+0x24349) SUMMARY: AddressSanitizer: 72 byte(s) leaked in 3 allocation(s). Signed-off-by: Andrzej Hunt <ajrhunt@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-04-25 16:16:16 +02:00
if (!s)
return;
while (*s) {
strbuf_release(*s);
free(*s++);
}
free(sbs);
}
int strbuf_cmp(const struct strbuf *a, const struct strbuf *b)
{
size_t len = a->len < b->len ? a->len: b->len;
int cmp = memcmp(a->buf, b->buf, len);
if (cmp)
return cmp;
return a->len < b->len ? -1: a->len != b->len;
}
void strbuf_splice(struct strbuf *sb, size_t pos, size_t len,
const void *data, size_t dlen)
{
if (unsigned_add_overflows(pos, len))
die("you want to use way too much memory");
if (pos > sb->len)
die("`pos' is too far after the end of the buffer");
if (pos + len > sb->len)
die("`pos + len' is too far after the end of the buffer");
if (dlen >= len)
strbuf_grow(sb, dlen - len);
memmove(sb->buf + pos + dlen,
sb->buf + pos + len,
sb->len - pos - len);
memcpy(sb->buf + pos, data, dlen);
strbuf_setlen(sb, sb->len + dlen - len);
}
void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len)
{
strbuf_splice(sb, pos, 0, data, len);
}
void strbuf_vinsertf(struct strbuf *sb, size_t pos, const char *fmt, va_list ap)
{
int len, len2;
char save;
va_list cp;
if (pos > sb->len)
die("`pos' is too far after the end of the buffer");
va_copy(cp, ap);
len = vsnprintf(sb->buf + sb->len, 0, fmt, cp);
va_end(cp);
if (len < 0)
BUG("your vsnprintf is broken (returned %d)", len);
if (!len)
return; /* nothing to do */
if (unsigned_add_overflows(sb->len, len))
die("you want to use way too much memory");
strbuf_grow(sb, len);
memmove(sb->buf + pos + len, sb->buf + pos, sb->len - pos);
/* vsnprintf() will append a NUL, overwriting one of our characters */
save = sb->buf[pos + len];
len2 = vsnprintf(sb->buf + pos, len + 1, fmt, ap);
sb->buf[pos + len] = save;
if (len2 != len)
BUG("your vsnprintf is broken (returns inconsistent lengths)");
strbuf_setlen(sb, sb->len + len);
}
void strbuf_insertf(struct strbuf *sb, size_t pos, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
strbuf_vinsertf(sb, pos, fmt, ap);
va_end(ap);
}
void strbuf_remove(struct strbuf *sb, size_t pos, size_t len)
{
strbuf_splice(sb, pos, len, "", 0);
}
void strbuf_add(struct strbuf *sb, const void *data, size_t len)
{
strbuf_grow(sb, len);
memcpy(sb->buf + sb->len, data, len);
strbuf_setlen(sb, sb->len + len);
}
void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2)
{
strbuf_grow(sb, sb2->len);
memcpy(sb->buf + sb->len, sb2->buf, sb2->len);
strbuf_setlen(sb, sb->len + sb2->len);
}
const char *strbuf_join_argv(struct strbuf *buf,
int argc, const char **argv, char delim)
{
if (!argc)
return buf->buf;
strbuf_addstr(buf, *argv);
while (--argc) {
strbuf_addch(buf, delim);
strbuf_addstr(buf, *(++argv));
}
return buf->buf;
}
void strbuf_addchars(struct strbuf *sb, int c, size_t n)
{
strbuf_grow(sb, n);
memset(sb->buf + sb->len, c, n);
strbuf_setlen(sb, sb->len + n);
}
void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
strbuf_vaddf(sb, fmt, ap);
va_end(ap);
}
static void add_lines(struct strbuf *out,
const char *prefix,
const char *buf, size_t size,
int space_after_prefix)
{
while (size) {
const char *next = memchr(buf, '\n', size);
next = next ? (next + 1) : (buf + size);
strbuf_addstr(out, prefix);
if (space_after_prefix && buf[0] != '\n' && buf[0] != '\t')
strbuf_addch(out, ' ');
strbuf_add(out, buf, next - buf);
size -= next - buf;
buf = next;
}
strbuf_complete_line(out);
}
void strbuf_add_commented_lines(struct strbuf *out, const char *buf,
size_t size, const char *comment_prefix)
{
add_lines(out, comment_prefix, buf, size, 1);
}
void strbuf_commented_addf(struct strbuf *sb, const char *comment_prefix,
const char *fmt, ...)
{
va_list params;
struct strbuf buf = STRBUF_INIT;
int incomplete_line = sb->len && sb->buf[sb->len - 1] != '\n';
va_start(params, fmt);
strbuf_vaddf(&buf, fmt, params);
va_end(params);
strbuf_add_commented_lines(sb, buf.buf, buf.len, comment_prefix);
if (incomplete_line)
sb->buf[--sb->len] = '\0';
strbuf_release(&buf);
}
void strbuf_vaddf(struct strbuf *sb, const char *fmt, va_list ap)
{
int len;
va_list cp;
if (!strbuf_avail(sb))
strbuf_grow(sb, 64);
va_copy(cp, ap);
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, cp);
va_end(cp);
if (len < 0)
BUG("your vsnprintf is broken (returned %d)", len);
if (len > strbuf_avail(sb)) {
strbuf_grow(sb, len);
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
if (len > strbuf_avail(sb))
BUG("your vsnprintf is broken (insatiable)");
}
strbuf_setlen(sb, sb->len + len);
}
int strbuf_expand_step(struct strbuf *sb, const char **formatp)
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-09 01:49:42 +01:00
{
const char *format = *formatp;
const char *percent = strchrnul(format, '%');
strbuf_add(sb, format, percent - format);
if (!*percent)
return 0;
*formatp = percent + 1;
return 1;
--pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-09 01:49:42 +01:00
}
size_t strbuf_expand_literal(struct strbuf *sb, const char *placeholder)
{
int ch;
switch (placeholder[0]) {
case 'n': /* newline */
strbuf_addch(sb, '\n');
return 1;
case 'x':
/* %x00 == NUL, %x0a == LF, etc. */
ch = hex2chr(placeholder + 1);
if (ch < 0)
return 0;
strbuf_addch(sb, ch);
return 3;
}
return 0;
}
void strbuf_expand_bad_format(const char *format, const char *command)
{
const char *end;
if (*format != '(')
/* TRANSLATORS: The first %s is a command like "ls-tree". */
die(_("bad %s format: element '%s' does not start with '('"),
command, format);
end = strchr(format + 1, ')');
if (!end)
/* TRANSLATORS: The first %s is a command like "ls-tree". */
die(_("bad %s format: element '%s' does not end in ')'"),
command, format);
/* TRANSLATORS: %s is a command like "ls-tree". */
die(_("bad %s format: %%%.*s"),
command, (int)(end - format + 1), format);
}
void strbuf_addbuf_percentquote(struct strbuf *dst, const struct strbuf *src)
{
size_t i, len = src->len;
for (i = 0; i < len; i++) {
if (src->buf[i] == '%')
strbuf_addch(dst, '%');
strbuf_addch(dst, src->buf[i]);
}
}
#define URL_UNSAFE_CHARS " <>\"%{}|\\^`:?#[]@!$&'()*+,;="
void strbuf_add_percentencode(struct strbuf *dst, const char *src, int flags)
{
size_t i, len = strlen(src);
for (i = 0; i < len; i++) {
unsigned char ch = src[i];
if (ch <= 0x1F || ch >= 0x7F ||
(ch == '/' && (flags & STRBUF_ENCODE_SLASH)) ||
strchr(URL_UNSAFE_CHARS, ch))
strbuf_addf(dst, "%%%02X", (unsigned char)ch);
else
strbuf_addch(dst, ch);
}
}
size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
{
size_t res;
size_t oldalloc = sb->alloc;
strbuf_grow(sb, size);
res = fread(sb->buf + sb->len, 1, size, f);
if (res > 0)
strbuf_setlen(sb, sb->len + res);
else if (oldalloc == 0)
strbuf_release(sb);
return res;
}
ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
{
size_t oldlen = sb->len;
size_t oldalloc = sb->alloc;
strbuf_grow(sb, hint ? hint : 8192);
for (;;) {
ssize_t want = sb->alloc - sb->len - 1;
ssize_t got = read_in_full(fd, sb->buf + sb->len, want);
if (got < 0) {
if (oldalloc == 0)
strbuf_release(sb);
else
strbuf_setlen(sb, oldlen);
return -1;
}
sb->len += got;
if (got < want)
break;
strbuf_grow(sb, 8192);
}
sb->buf[sb->len] = '\0';
return sb->len - oldlen;
}
ssize_t strbuf_read_once(struct strbuf *sb, int fd, size_t hint)
{
size_t oldalloc = sb->alloc;
ssize_t cnt;
strbuf_grow(sb, hint ? hint : 8192);
cnt = xread(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
if (cnt > 0)
strbuf_setlen(sb, sb->len + cnt);
else if (oldalloc == 0)
strbuf_release(sb);
return cnt;
}
ssize_t strbuf_write(struct strbuf *sb, FILE *f)
{
return sb->len ? fwrite(sb->buf, 1, sb->len, f) : 0;
}
#define STRBUF_MAXLINK (2*PATH_MAX)
int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
{
size_t oldalloc = sb->alloc;
if (hint < 32)
hint = 32;
while (hint < STRBUF_MAXLINK) {
ssize_t len;
strbuf_grow(sb, hint);
len = readlink(path, sb->buf, hint);
if (len < 0) {
if (errno != ERANGE)
break;
} else if (len < hint) {
strbuf_setlen(sb, len);
return 0;
}
/* .. the buffer was too small - try again */
hint *= 2;
}
if (oldalloc == 0)
strbuf_release(sb);
return -1;
}
int strbuf_getcwd(struct strbuf *sb)
{
size_t oldalloc = sb->alloc;
size_t guessed_len = 128;
for (;; guessed_len *= 2) {
strbuf_grow(sb, guessed_len);
if (getcwd(sb->buf, sb->alloc)) {
strbuf_setlen(sb, strlen(sb->buf));
return 0;
}
/*
* If getcwd(3) is implemented as a syscall that falls
* back to a regular lookup using readdir(3) etc. then
* we may be able to avoid EACCES by providing enough
* space to the syscall as it's not necessarily bound
* to the same restrictions as the fallback.
*/
if (errno == EACCES && guessed_len < PATH_MAX)
continue;
if (errno != ERANGE)
break;
}
if (oldalloc == 0)
strbuf_release(sb);
else
strbuf_reset(sb);
return -1;
}
#ifdef HAVE_GETDELIM
int strbuf_getwholeline(struct strbuf *sb, FILE *fp, int term)
{
ssize_t r;
if (feof(fp))
return EOF;
strbuf_reset(sb);
/* Translate slopbuf to NULL, as we cannot call realloc on it */
if (!sb->alloc)
sb->buf = NULL;
errno = 0;
r = getdelim(&sb->buf, &sb->alloc, term, fp);
if (r > 0) {
sb->len = r;
return 0;
}
assert(r == -1);
/*
* Normally we would have called xrealloc, which will try to free
* memory and recover. But we have no way to tell getdelim() to do so.
* Worse, we cannot try to recover ENOMEM ourselves, because we have
* no idea how many bytes were read by getdelim.
*
* Dying here is reasonable. It mirrors what xrealloc would do on
* catastrophic memory failure. We skip the opportunity to free pack
* memory and retry, but that's unlikely to help for a malloc small
* enough to hold a single line of input, anyway.
*/
if (errno == ENOMEM)
die("Out of memory, getdelim failed");
strbuf_getwholeline: NUL-terminate getdelim buffer on error Commit 0cc30e0 (strbuf_getwholeline: use getdelim if it is available, 2015-04-16) tries to clean up after getdelim() returns EOF, but gets one case wrong, which can lead in some obscure cases to us reading uninitialized memory. After getdelim() returns -1, we re-initialize the strbuf only if sb->buf is NULL. The thinking was that either: 1. We fed an existing allocated buffer to getdelim(), and at most it would have realloc'd, leaving our NUL in place. 2. We didn't have a buffer to feed, so we gave getdelim() NULL; sb->buf will remain NULL, and we just want to restore the empty slopbuf. But that second case isn't quite right. getdelim() may allocate a buffer, write nothing into it, and then return EOF. The resulting strbuf rightfully has sb->len set to "0", but is missing the NUL terminator in the first byte. Most call-sites are fine with this. They see the EOF and don't bother looking at the strbuf. Or they notice that sb->len is empty, and don't look at the contents. But there's at least one case that does neither, and relies on parsing the resulting (possibly zero-length) string: fast-import. You can see this in action with the new test (though we probably only notice failure there when run with --valgrind or ASAN). We can fix this by unconditionally resetting the strbuf when we have a buffer after getdelim(). That fixes case 2 above. Case 1 is probably already fine in practice, but it does not hurt for us to re-assert our invariants (especially because we are relying on whatever getdelim() happens to do, which may vary from platform to platform). Our fix covers that case, too. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-03-05 19:43:30 +01:00
/*
* Restore strbuf invariants; if getdelim left us with a NULL pointer,
* we can just re-init, but otherwise we should make sure that our
* length is empty, and that the result is NUL-terminated.
*/
if (!sb->buf)
strbuf_init(sb, 0);
strbuf_getwholeline: NUL-terminate getdelim buffer on error Commit 0cc30e0 (strbuf_getwholeline: use getdelim if it is available, 2015-04-16) tries to clean up after getdelim() returns EOF, but gets one case wrong, which can lead in some obscure cases to us reading uninitialized memory. After getdelim() returns -1, we re-initialize the strbuf only if sb->buf is NULL. The thinking was that either: 1. We fed an existing allocated buffer to getdelim(), and at most it would have realloc'd, leaving our NUL in place. 2. We didn't have a buffer to feed, so we gave getdelim() NULL; sb->buf will remain NULL, and we just want to restore the empty slopbuf. But that second case isn't quite right. getdelim() may allocate a buffer, write nothing into it, and then return EOF. The resulting strbuf rightfully has sb->len set to "0", but is missing the NUL terminator in the first byte. Most call-sites are fine with this. They see the EOF and don't bother looking at the strbuf. Or they notice that sb->len is empty, and don't look at the contents. But there's at least one case that does neither, and relies on parsing the resulting (possibly zero-length) string: fast-import. You can see this in action with the new test (though we probably only notice failure there when run with --valgrind or ASAN). We can fix this by unconditionally resetting the strbuf when we have a buffer after getdelim(). That fixes case 2 above. Case 1 is probably already fine in practice, but it does not hurt for us to re-assert our invariants (especially because we are relying on whatever getdelim() happens to do, which may vary from platform to platform). Our fix covers that case, too. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-03-05 19:43:30 +01:00
else
strbuf_reset(sb);
return EOF;
}
#else
int strbuf_getwholeline(struct strbuf *sb, FILE *fp, int term)
{
int ch;
if (feof(fp))
return EOF;
strbuf_reset(sb);
flockfile(fp);
while ((ch = getc_unlocked(fp)) != EOF) {
if (!strbuf_avail(sb))
strbuf_grow(sb, 1);
sb->buf[sb->len++] = ch;
if (ch == term)
break;
}
funlockfile(fp);
if (ch == EOF && sb->len == 0)
return EOF;
sb->buf[sb->len] = '\0';
return 0;
}
#endif
int strbuf_appendwholeline(struct strbuf *sb, FILE *fp, int term)
{
struct strbuf line = STRBUF_INIT;
if (strbuf_getwholeline(&line, fp, term))
return EOF;
strbuf_addbuf(sb, &line);
strbuf_release(&line);
return 0;
}
static int strbuf_getdelim(struct strbuf *sb, FILE *fp, int term)
{
if (strbuf_getwholeline(sb, fp, term))
return EOF;
if (sb->buf[sb->len - 1] == term)
strbuf_setlen(sb, sb->len - 1);
return 0;
}
int strbuf_getdelim_strip_crlf(struct strbuf *sb, FILE *fp, int term)
{
if (strbuf_getwholeline(sb, fp, term))
return EOF;
if (term == '\n' && sb->buf[sb->len - 1] == '\n') {
strbuf_setlen(sb, sb->len - 1);
if (sb->len && sb->buf[sb->len - 1] == '\r')
strbuf_setlen(sb, sb->len - 1);
}
return 0;
}
int strbuf_getline(struct strbuf *sb, FILE *fp)
{
return strbuf_getdelim_strip_crlf(sb, fp, '\n');
}
int strbuf_getline_lf(struct strbuf *sb, FILE *fp)
{
return strbuf_getdelim(sb, fp, '\n');
}
int strbuf_getline_nul(struct strbuf *sb, FILE *fp)
{
return strbuf_getdelim(sb, fp, '\0');
}
int strbuf_getwholeline_fd(struct strbuf *sb, int fd, int term)
{
strbuf_reset(sb);
while (1) {
char ch;
ssize_t len = xread(fd, &ch, 1);
if (len <= 0)
return EOF;
strbuf_addch(sb, ch);
if (ch == term)
break;
}
return 0;
}
ssize_t strbuf_read_file(struct strbuf *sb, const char *path, size_t hint)
{
int fd;
ssize_t len;
int saved_errno;
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
len = strbuf_read(sb, fd, hint);
saved_errno = errno;
close(fd);
if (len < 0) {
errno = saved_errno;
return -1;
}
return len;
}
void strbuf_add_lines(struct strbuf *out, const char *prefix,
const char *buf, size_t size)
{
add_lines(out, prefix, buf, size, 0);
}
void strbuf_addstr_xml_quoted(struct strbuf *buf, const char *s)
{
while (*s) {
size_t len = strcspn(s, "\"<>&");
strbuf_add(buf, s, len);
s += len;
switch (*s) {
case '"':
strbuf_addstr(buf, "&quot;");
break;
case '<':
strbuf_addstr(buf, "&lt;");
break;
case '>':
strbuf_addstr(buf, "&gt;");
break;
case '&':
strbuf_addstr(buf, "&amp;");
break;
case 0:
return;
}
s++;
}
}
static void strbuf_add_urlencode(struct strbuf *sb, const char *s, size_t len,
char_predicate allow_unencoded_fn)
{
strbuf_grow(sb, len);
while (len--) {
char ch = *s++;
if (allow_unencoded_fn(ch))
strbuf_addch(sb, ch);
else
strbuf_addf(sb, "%%%02x", (unsigned char)ch);
}
}
void strbuf_addstr_urlencode(struct strbuf *sb, const char *s,
char_predicate allow_unencoded_fn)
{
strbuf_add_urlencode(sb, s, strlen(s), allow_unencoded_fn);
}
static void strbuf_humanise(struct strbuf *buf, off_t bytes,
int humanise_rate)
{
if (bytes > 1 << 30) {
strbuf_addf(buf,
humanise_rate == 0 ?
/* TRANSLATORS: IEC 80000-13:2008 gibibyte */
_("%u.%2.2u GiB") :
/* TRANSLATORS: IEC 80000-13:2008 gibibyte/second */
_("%u.%2.2u GiB/s"),
(unsigned)(bytes >> 30),
(unsigned)(bytes & ((1 << 30) - 1)) / 10737419);
} else if (bytes > 1 << 20) {
unsigned x = bytes + 5243; /* for rounding */
strbuf_addf(buf,
humanise_rate == 0 ?
/* TRANSLATORS: IEC 80000-13:2008 mebibyte */
_("%u.%2.2u MiB") :
/* TRANSLATORS: IEC 80000-13:2008 mebibyte/second */
_("%u.%2.2u MiB/s"),
x >> 20, ((x & ((1 << 20) - 1)) * 100) >> 20);
} else if (bytes > 1 << 10) {
unsigned x = bytes + 5; /* for rounding */
strbuf_addf(buf,
humanise_rate == 0 ?
/* TRANSLATORS: IEC 80000-13:2008 kibibyte */
_("%u.%2.2u KiB") :
/* TRANSLATORS: IEC 80000-13:2008 kibibyte/second */
_("%u.%2.2u KiB/s"),
x >> 10, ((x & ((1 << 10) - 1)) * 100) >> 10);
} else {
strbuf_addf(buf,
humanise_rate == 0 ?
/* TRANSLATORS: IEC 80000-13:2008 byte */
Q_("%u byte", "%u bytes", bytes) :
/* TRANSLATORS: IEC 80000-13:2008 byte/second */
Q_("%u byte/s", "%u bytes/s", bytes),
(unsigned)bytes);
}
}
void strbuf_humanise_bytes(struct strbuf *buf, off_t bytes)
{
strbuf_humanise(buf, bytes, 0);
}
void strbuf_humanise_rate(struct strbuf *buf, off_t bytes)
{
strbuf_humanise(buf, bytes, 1);
}
int printf_ln(const char *fmt, ...)
{
int ret;
va_list ap;
va_start(ap, fmt);
ret = vprintf(fmt, ap);
va_end(ap);
if (ret < 0 || putchar('\n') == EOF)
return -1;
return ret + 1;
}
int fprintf_ln(FILE *fp, const char *fmt, ...)
{
int ret;
va_list ap;
va_start(ap, fmt);
ret = vfprintf(fp, fmt, ap);
va_end(ap);
if (ret < 0 || putc('\n', fp) == EOF)
return -1;
return ret + 1;
}
char *xstrdup_tolower(const char *string)
{
char *result;
size_t len, i;
len = strlen(string);
result = xmallocz(len);
for (i = 0; i < len; i++)
result[i] = tolower(string[i]);
return result;
}
char *xstrdup_toupper(const char *string)
{
char *result;
size_t len, i;
len = strlen(string);
result = xmallocz(len);
for (i = 0; i < len; i++)
result[i] = toupper(string[i]);
return result;
}
char *xstrvfmt(const char *fmt, va_list ap)
{
struct strbuf buf = STRBUF_INIT;
strbuf_vaddf(&buf, fmt, ap);
return strbuf_detach(&buf, NULL);
}
char *xstrfmt(const char *fmt, ...)
{
va_list ap;
char *ret;
va_start(ap, fmt);
ret = xstrvfmt(fmt, ap);
va_end(ap);
return ret;
}
void strbuf_addftime(struct strbuf *sb, const char *fmt, const struct tm *tm,
int tz_offset, int suppress_tz_name)
{
struct strbuf munged_fmt = STRBUF_INIT;
size_t hint = 128;
size_t len;
if (!*fmt)
return;
/*
* There is no portable way to pass timezone information to
strbuf_addftime(): handle "%s" manually The strftime() function has a non-standard "%s" extension, which prints the number of seconds since the epoch. But the "struct tm" we get has already been adjusted for a particular time zone; going back to an epoch time requires knowing that zone offset. Since strftime() doesn't take such an argument, round-tripping to a "struct tm" and back to the "%s" format may produce the wrong value (off by tz_offset seconds). Since we're already passing in the zone offset courtesy of c3fbf81a85 (strbuf: let strbuf_addftime handle %z and %Z itself, 2017-06-15), we can use that same value to adjust our epoch seconds accordingly. Note that the description above makes it sound like strftime()'s "%s" is useless (and really, the issue is shared by mktime(), which is what strftime() would use under the hood). But it gets the two cases for which it's designed correct: - the result of gmtime() will have a zero offset, so no adjustment is necessary - the result of localtime() will be offset by the local zone offset, and mktime() and strftime() are defined to assume this offset when converting back (there's actually some magic here; some implementations record this in the "struct tm", but we can't portably access or manipulate it. But they somehow "know" whether a "struct tm" is from gmtime() or localtime()). This latter point means that "format-local:%s" actually works correctly already, because in that case we rely on the system routines due to 6eced3ec5e (date: use localtime() for "-local" time formats, 2017-06-15). Our problem comes when trying to show times in the author's zone, as the system routines provide no mechanism for converting in non-local zones. So in those cases we have a "struct tm" that came from gmtime(), but has been manipulated according to our offset. The tests cover the broken round-trip by formatting "%s" for a time in a non-system timezone. We use the made-up "+1234" here, which has two advantages. One, we know it won't ever be the real system zone (and so we're actually testing a case that would break). And two, since it has a minute component, we're testing the full decoding of the +HHMM zone into a number of seconds. Likewise, we test the "-1234" variant to make sure there aren't any sign mistakes. There's one final test, which covers "format-local:%s". As noted, this already passes, but it's important to check that we didn't regress this case. In particular, the caller in show_date() is relying on localtime() to have done the zone adjustment, independent of any tz_offset we compute ourselves. These should match up, since our local_tzoffset() is likewise built around localtime(). But it would be easy for a caller to forget to pass in a correct tz_offset to strbuf_addftime(). Fortunately show_date() does this correctly (it has to because of the existing handling of %z), and the test continues to pass. So this one is just future-proofing against a change in our assumptions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-11-02 12:35:34 +01:00
* strftime, so we handle %z and %Z here. Likewise '%s', because
* going back to an epoch time requires knowing the zone.
*
* Note that tz_offset is in the "[-+]HHMM" decimal form; this is what
* we want for %z, but the computation for %s has to convert to number
* of seconds.
*/
while (strbuf_expand_step(&munged_fmt, &fmt)) {
if (skip_prefix(fmt, "%", &fmt))
strbuf_addstr(&munged_fmt, "%%");
else if (skip_prefix(fmt, "s", &fmt))
strbuf_addftime(): handle "%s" manually The strftime() function has a non-standard "%s" extension, which prints the number of seconds since the epoch. But the "struct tm" we get has already been adjusted for a particular time zone; going back to an epoch time requires knowing that zone offset. Since strftime() doesn't take such an argument, round-tripping to a "struct tm" and back to the "%s" format may produce the wrong value (off by tz_offset seconds). Since we're already passing in the zone offset courtesy of c3fbf81a85 (strbuf: let strbuf_addftime handle %z and %Z itself, 2017-06-15), we can use that same value to adjust our epoch seconds accordingly. Note that the description above makes it sound like strftime()'s "%s" is useless (and really, the issue is shared by mktime(), which is what strftime() would use under the hood). But it gets the two cases for which it's designed correct: - the result of gmtime() will have a zero offset, so no adjustment is necessary - the result of localtime() will be offset by the local zone offset, and mktime() and strftime() are defined to assume this offset when converting back (there's actually some magic here; some implementations record this in the "struct tm", but we can't portably access or manipulate it. But they somehow "know" whether a "struct tm" is from gmtime() or localtime()). This latter point means that "format-local:%s" actually works correctly already, because in that case we rely on the system routines due to 6eced3ec5e (date: use localtime() for "-local" time formats, 2017-06-15). Our problem comes when trying to show times in the author's zone, as the system routines provide no mechanism for converting in non-local zones. So in those cases we have a "struct tm" that came from gmtime(), but has been manipulated according to our offset. The tests cover the broken round-trip by formatting "%s" for a time in a non-system timezone. We use the made-up "+1234" here, which has two advantages. One, we know it won't ever be the real system zone (and so we're actually testing a case that would break). And two, since it has a minute component, we're testing the full decoding of the +HHMM zone into a number of seconds. Likewise, we test the "-1234" variant to make sure there aren't any sign mistakes. There's one final test, which covers "format-local:%s". As noted, this already passes, but it's important to check that we didn't regress this case. In particular, the caller in show_date() is relying on localtime() to have done the zone adjustment, independent of any tz_offset we compute ourselves. These should match up, since our local_tzoffset() is likewise built around localtime(). But it would be easy for a caller to forget to pass in a correct tz_offset to strbuf_addftime(). Fortunately show_date() does this correctly (it has to because of the existing handling of %z), and the test continues to pass. So this one is just future-proofing against a change in our assumptions. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-11-02 12:35:34 +01:00
strbuf_addf(&munged_fmt, "%"PRItime,
(timestamp_t)tm_to_time_t(tm) -
3600 * (tz_offset / 100) -
60 * (tz_offset % 100));
else if (skip_prefix(fmt, "z", &fmt))
strbuf_addf(&munged_fmt, "%+05d", tz_offset);
else if (suppress_tz_name && skip_prefix(fmt, "Z", &fmt))
; /* nothing */
else
strbuf_addch(&munged_fmt, '%');
}
fmt = munged_fmt.buf;
strbuf_grow(sb, hint);
len = strftime(sb->buf + sb->len, sb->alloc - sb->len, fmt, tm);
if (!len) {
/*
* strftime reports "0" if it could not fit the result in the buffer.
* Unfortunately, it also reports "0" if the requested time string
* takes 0 bytes. So our strategy is to munge the format so that the
* output contains at least one character, and then drop the extra
* character before returning.
*/
strbuf_addch(&munged_fmt, ' ');
while (!len) {
hint *= 2;
strbuf_grow(sb, hint);
len = strftime(sb->buf + sb->len, sb->alloc - sb->len,
munged_fmt.buf, tm);
}
len--; /* drop munged space */
}
strbuf_release(&munged_fmt);
strbuf_setlen(sb, sb->len + len);
}
/*
* Returns the length of a line, without trailing spaces.
*
* If the line ends with newline, it will be removed too.
*/
static size_t cleanup(char *line, size_t len)
{
while (len) {
unsigned char c = line[len - 1];
if (!isspace(c))
break;
len--;
}
return len;
}
/*
* Remove empty lines from the beginning and end
* and also trailing spaces from every line.
*
* Turn multiple consecutive empty lines between paragraphs
* into just one empty line.
*
* If the input has only empty lines and spaces,
* no output will be produced.
*
* If last line does not have a newline at the end, one is added.
*
* Pass a non-NULL comment_prefix to skip every line starting
* with it.
*/
void strbuf_stripspace(struct strbuf *sb, const char *comment_prefix)
{
size_t empties = 0;
size_t i, j, len, newlen;
char *eol;
/* We may have to add a newline. */
strbuf_grow(sb, 1);
for (i = j = 0; i < sb->len; i += len, j += newlen) {
eol = memchr(sb->buf + i, '\n', sb->len - i);
len = eol ? eol - (sb->buf + i) + 1 : sb->len - i;
if (comment_prefix && len &&
starts_with(sb->buf + i, comment_prefix)) {
newlen = 0;
continue;
}
newlen = cleanup(sb->buf + i, len);
/* Not just an empty line? */
if (newlen) {
if (empties > 0 && j > 0)
sb->buf[j++] = '\n';
empties = 0;
memmove(sb->buf + j, sb->buf + i, newlen);
sb->buf[newlen + j++] = '\n';
} else {
empties++;
}
}
strbuf_setlen(sb, j);
}
link_alt_odb_entry: handle normalize_path errors When we add a new alternate to the list, we try to normalize out any redundant "..", etc. However, we do not look at the return value of normalize_path_copy(), and will happily continue with a path that could not be normalized. Worse, the normalizing process is done in-place, so we are left with whatever half-finished working state the normalizing function was in. Fortunately, this cannot cause us to read past the end of our buffer, as that working state will always leave the NUL from the original path in place. And we do tend to notice problems when we check is_directory() on the path. But you can see the nonsense that we feed to is_directory with an entry like: this/../../is/../../way/../../too/../../deep/../../to/../../resolve in your objects/info/alternates, which yields: error: object directory /to/e/deep/too/way//ects/this/../../is/../../way/../../too/../../deep/../../to/../../resolve does not exist; check .git/objects/info/alternates. We can easily fix this just by checking the return value. But that makes it hard to generate a good error message, since we're normalizing in-place and our input value has been overwritten by cruft. Instead, let's provide a strbuf helper that does an in-place normalize, but restores the original contents on error. This uses a second buffer under the hood, which is slightly less efficient, but this is not a performance-critical code path. The strbuf helper can also properly set the "len" parameter of the strbuf before returning. Just doing: normalize_path_copy(buf.buf, buf.buf); will shorten the string, but leave buf.len at the original length. That may be confusing to later code which uses the strbuf. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-03 22:34:17 +02:00
void strbuf_strip_file_from_path(struct strbuf *sb)
{
char *path_sep = find_last_dir_sep(sb->buf);
strbuf_setlen(sb, path_sep ? path_sep - sb->buf + 1 : 0);
}