1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-18 08:56:08 +02:00
git/sha1_file.c

4029 lines
100 KiB
C
Raw Normal View History

/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
*
* This handles basic git sha1 object files - packing, unpacking,
* creation etc.
*/
#include "cache.h"
#include "config.h"
#include "string-list.h"
#include "lockfile.h"
#include "delta.h"
#include "pack.h"
#include "blob.h"
#include "commit.h"
#include "run-command.h"
#include "tag.h"
#include "tree.h"
#include "tree-walk.h"
#include "refs.h"
#include "pack-revindex.h"
sha1-lookup: more memory efficient search in sorted list of SHA-1 Currently, when looking for a packed object from the pack idx, a simple binary search is used. A conventional binary search loop looks like this: unsigned lo, hi; do { unsigned mi = (lo + hi) / 2; int cmp = "entry pointed at by mi" minus "target"; if (!cmp) return mi; "mi is the wanted one" if (cmp > 0) hi = mi; "mi is larger than target" else lo = mi+1; "mi is smaller than target" } while (lo < hi); "did not find what we wanted" The invariants are: - When entering the loop, 'lo' points at a slot that is never above the target (it could be at the target), 'hi' points at a slot that is guaranteed to be above the target (it can never be at the target). - We find a point 'mi' between 'lo' and 'hi' ('mi' could be the same as 'lo', but never can be as high as 'hi'), and check if 'mi' hits the target. There are three cases: - if it is a hit, we have found what we are looking for; - if it is strictly higher than the target, we set it to 'hi', and repeat the search. - if it is strictly lower than the target, we update 'lo' to one slot after it, because we allow 'lo' to be at the target and 'mi' is known to be below the target. If the loop exits, there is no matching entry. When choosing 'mi', we do not have to take the "middle" but anywhere in between 'lo' and 'hi', as long as lo <= mi < hi is satisfied. When we somehow know that the distance between the target and 'lo' is much shorter than the target and 'hi', we could pick 'mi' that is much closer to 'lo' than (hi+lo)/2, which a conventional binary search would pick. This patch takes advantage of the fact that the SHA-1 is a good hash function, and as long as there are enough entries in the table, we can expect uniform distribution. An entry that begins with for example "deadbeef..." is much likely to appear much later than in the midway of a reasonably populated table. In fact, it can be expected to be near 87% (222/256) from the top of the table. This is a work-in-progress and has switches to allow easier experiments and debugging. Exporting GIT_USE_LOOKUP environment variable enables this code. On my admittedly memory starved machine, with a partial KDE repository (3.0G pack with 95M idx): $ GIT_USE_LOOKUP=t git log -800 --stat HEAD >/dev/null 3.93user 0.16system 0:04.09elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+55588minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -800 --stat HEAD >/dev/null 4.00user 0.15system 0:04.17elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+60258minor)pagefaults 0swaps In the same repository: $ GIT_USE_LOOKUP=t git log -2000 HEAD >/dev/null 0.12user 0.00system 0:00.12elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+4241minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -2000 HEAD >/dev/null 0.05user 0.01system 0:00.07elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+8506minor)pagefaults 0swaps There isn't much time difference, but the number of minor faults seems to show that we are touching much smaller number of pages, which is expected. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-29 11:05:47 +01:00
#include "sha1-lookup.h"
#include "bulk-checkin.h"
#include "streaming.h"
#include "dir.h"
find_pack_entry: replace last_found_pack with MRU cache Each pack has an index for looking up entries in O(log n) time, but if we have multiple packs, we have to scan through them linearly. This can produce a measurable overhead for some operations. We dealt with this long ago in f7c22cc (always start looking up objects in the last used pack first, 2007-05-30), which keeps what is essentially a 1-element most-recently-used cache. In theory, we should be able to do better by keeping a similar but longer cache, that is the same length as the pack-list itself. Since we now have a convenient generic MRU structure, we can plug it in and measure. Here are the numbers for running p5303 against linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8% 5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0% 5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3% 5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6% 5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7% 5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9% The main numbers of interest here are the rev-list ones (since that is exercising the normal object lookup code path). The single-pack case shouldn't improve at all; the 260ms speedup there is just part of the run-to-run noise (but it's important to note that we didn't make anything worse with the overhead of maintaining our cache). In the 50-pack case, we see similar results. There may be a slight improvement, but it's mostly within the noise. The 1000-pack case does show a big improvement, though. That carries over to the repack case, as well. Even though we haven't touched its pack-search loop yet, it does still do a lot of normal object lookups (e.g., for the internal revision walk), and so improves. As a point of reference, I also ran the 1000-pack test against a version of HEAD^ with the last_found_pack optimization disabled. It takes ~60s, so that gives an indication of how much even the single-element cache is helping. For comparison, here's a smaller repository, git.git: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3% 5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1% 5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6% 5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0% 5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8% 5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7% You can see that the percentage improvement is similar. That's because the lookup we are optimizing is roughly O(nr_objects * nr_packs). Since the number of packs is constant in both tests, we'd expect the improvement to be linear in the number of objects. But the whole process is also linear in the number of objects, so the improvement is a constant factor. The exact improvement does also depend on the contents of the packs. In p5303, the extra packs all have 5 first-parent commits in them, which is a reasonable simulation of a pushed-to repository. But it also means that only 250 first-parent commits are in those packs (compared to almost 50,000 total in linux.git), and the rest are in the huge "base" pack. So once we start looking at history in taht big pack, that's where we'll find most everything, and even the 1-element cache gets close to 100% cache hits. You could almost certainly show better numbers with a more pathological case (e.g., distributing the objects more evenly across the packs). But that's simply not that realistic a scenario, so it makes more sense to focus on these numbers. The implementation itself is a straightforward application of the MRU code. We provide an MRU-ordered list of packs that shadows the packed_git list. This is easy to do because we only create and revise the pack list in one place. The "reprepare" code path actually drops the whole MRU and replaces it for simplicity. It would be more efficient to just add new entries, but there's not much point in optimizing here; repreparing happens rarely, and only after doing a lot of other expensive work. The key things to keep optimized are traversal (which is just a normal linked list, albeit with one extra level of indirection over the regular packed_git list), and marking (which is a constant number of pointer assignments, though slightly more than the old last_found_pack was; it doesn't seem to create a measurable slowdown, though). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
#include "mru.h"
#include "list.h"
#include "mergesort.h"
alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-12 20:52:22 +01:00
#include "quote.h"
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
const unsigned char null_sha1[20];
const struct object_id null_oid;
const struct object_id empty_tree_oid = {
EMPTY_TREE_SHA1_BIN_LITERAL
};
const struct object_id empty_blob_oid = {
EMPTY_BLOB_SHA1_BIN_LITERAL
};
/*
* This is meant to hold a *small* number of objects that you would
* want read_sha1_file() to be able to return, but yet you do not want
* to write them into the object store (e.g. a browse-only
* application).
*/
static struct cached_object {
unsigned char sha1[20];
enum object_type type;
void *buf;
unsigned long size;
} *cached_objects;
static int cached_object_nr, cached_object_alloc;
static struct cached_object empty_tree = {
EMPTY_TREE_SHA1_BIN_LITERAL,
OBJ_TREE,
"",
0
};
static struct cached_object *find_cached_object(const unsigned char *sha1)
{
int i;
struct cached_object *co = cached_objects;
for (i = 0; i < cached_object_nr; i++, co++) {
if (!hashcmp(co->sha1, sha1))
return co;
}
if (!hashcmp(sha1, empty_tree.sha1))
return &empty_tree;
return NULL;
}
rerere: make sure it works even in a workdir attached to a young repository The git-new-workdir script in contrib/ makes a new work tree by sharing many subdirectories of the .git directory with the original repository. When rerere.enabled is set in the original repository, but the user has not encountered any conflicts yet, the original repository may not yet have .git/rr-cache directory. When rerere wants to run in a new work tree created from such a young original repository, it fails to mkdir(2) .git/rr-cache that is a symlink to a yet-to-be-created directory. There are three possible approaches to this: - A naive solution is not to create a symlink in the git-new-workdir script to a directory the original does not have (yet). This is not a solution, as we tend to lazily create subdirectories of .git/, and having rerere.enabled configuration set is a strong indication that the user _wants_ to have this lazy creation to happen; - We could always create .git/rr-cache upon repository creation. This is tempting but will not help people with existing repositories. - Detect this case by seeing that mkdir(2) failed with EEXIST, checking that the path is a symlink, and try running mkdir(2) on the link target. This patch solves the issue by doing the third one. Strictly speaking, this is incomplete. It does not attempt to handle relative symbolic link that points into the original repository, but this is good enough to help people who use contrib/workdir/git-new-workdir script. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-03-11 01:02:50 +01:00
int mkdir_in_gitdir(const char *path)
{
if (mkdir(path, 0777)) {
int saved_errno = errno;
struct stat st;
struct strbuf sb = STRBUF_INIT;
if (errno != EEXIST)
return -1;
/*
* Are we looking at a path in a symlinked worktree
* whose original repository does not yet have it?
* e.g. .git/rr-cache pointing at its original
* repository in which the user hasn't performed any
* conflict resolution yet?
*/
if (lstat(path, &st) || !S_ISLNK(st.st_mode) ||
strbuf_readlink(&sb, path, st.st_size) ||
!is_absolute_path(sb.buf) ||
mkdir(sb.buf, 0777)) {
strbuf_release(&sb);
errno = saved_errno;
return -1;
}
strbuf_release(&sb);
}
return adjust_shared_perm(path);
}
enum scld_error safe_create_leading_directories(char *path)
{
char *next_component = path + offset_1st_component(path);
enum scld_error ret = SCLD_OK;
while (ret == SCLD_OK && next_component) {
struct stat st;
char *slash = next_component, slash_character;
while (*slash && !is_dir_sep(*slash))
slash++;
if (!*slash)
break;
next_component = slash + 1;
while (is_dir_sep(*next_component))
next_component++;
if (!*next_component)
break;
slash_character = *slash;
*slash = '\0';
if (!stat(path, &st)) {
/* path exists */
if (!S_ISDIR(st.st_mode)) {
errno = ENOTDIR;
ret = SCLD_EXISTS;
}
} else if (mkdir(path, 0777)) {
if (errno == EEXIST &&
!stat(path, &st) && S_ISDIR(st.st_mode))
; /* somebody created it since we checked */
else if (errno == ENOENT)
/*
* Either mkdir() failed because
* somebody just pruned the containing
* directory, or stat() failed because
* the file that was in our way was
* just removed. Either way, inform
* the caller that it might be worth
* trying again:
*/
ret = SCLD_VANISHED;
else
ret = SCLD_FAILED;
} else if (adjust_shared_perm(path)) {
ret = SCLD_PERMS;
}
*slash = slash_character;
}
return ret;
}
enum scld_error safe_create_leading_directories_const(const char *path)
{
int save_errno;
/* path points to cache entries, so xstrdup before messing with it */
char *buf = xstrdup(path);
enum scld_error result = safe_create_leading_directories(buf);
save_errno = errno;
free(buf);
errno = save_errno;
return result;
}
int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
{
/*
* The number of times we will try to remove empty directories
* in the way of path. This is only 1 because if another
* process is racily creating directories that conflict with
* us, we don't want to fight against them.
*/
int remove_directories_remaining = 1;
/*
* The number of times that we will try to create the
* directories containing path. We are willing to attempt this
* more than once, because another process could be trying to
* clean up empty directories at the same time as we are
* trying to create them.
*/
int create_directories_remaining = 3;
/* A scratch copy of path, filled lazily if we need it: */
struct strbuf path_copy = STRBUF_INIT;
int ret, save_errno;
/* Sanity check: */
assert(*path);
retry_fn:
ret = fn(path, cb);
save_errno = errno;
if (!ret)
goto out;
if (errno == EISDIR && remove_directories_remaining-- > 0) {
/*
* A directory is in the way. Maybe it is empty; try
* to remove it:
*/
if (!path_copy.len)
strbuf_addstr(&path_copy, path);
if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
goto retry_fn;
} else if (errno == ENOENT && create_directories_remaining-- > 0) {
/*
* Maybe the containing directory didn't exist, or
* maybe it was just deleted by a process that is
* racing with us to clean up empty directories. Try
* to create it:
*/
enum scld_error scld_result;
if (!path_copy.len)
strbuf_addstr(&path_copy, path);
do {
scld_result = safe_create_leading_directories(path_copy.buf);
if (scld_result == SCLD_OK)
goto retry_fn;
} while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
}
out:
strbuf_release(&path_copy);
errno = save_errno;
return ret;
}
static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
{
int i;
for (i = 0; i < 20; i++) {
static char hex[] = "0123456789abcdef";
unsigned int val = sha1[i];
strbuf_addch(buf, hex[val >> 4]);
strbuf_addch(buf, hex[val & 0xf]);
if (!i)
strbuf_addch(buf, '/');
}
}
const char *sha1_file_name(const unsigned char *sha1)
{
static struct strbuf buf = STRBUF_INIT;
strbuf_reset(&buf);
strbuf_addf(&buf, "%s/", get_object_directory());
fill_sha1_path(&buf, sha1);
return buf.buf;
}
struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
{
strbuf_setlen(&alt->scratch, alt->base_len);
return &alt->scratch;
}
static const char *alt_sha1_path(struct alternate_object_database *alt,
const unsigned char *sha1)
{
struct strbuf *buf = alt_scratch_buf(alt);
fill_sha1_path(buf, sha1);
return buf->buf;
}
char *odb_pack_name(struct strbuf *buf,
const unsigned char *sha1,
const char *ext)
{
strbuf_reset(buf);
strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
sha1_to_hex(sha1), ext);
return buf->buf;
}
char *sha1_pack_name(const unsigned char *sha1)
{
static struct strbuf buf = STRBUF_INIT;
return odb_pack_name(&buf, sha1, "pack");
}
char *sha1_pack_index_name(const unsigned char *sha1)
{
static struct strbuf buf = STRBUF_INIT;
return odb_pack_name(&buf, sha1, "idx");
}
struct alternate_object_database *alt_odb_list;
static struct alternate_object_database **alt_odb_tail;
/*
* Return non-zero iff the path is usable as an alternate object database.
*/
static int alt_odb_usable(struct strbuf *path, const char *normalized_objdir)
{
struct alternate_object_database *alt;
/* Detect cases where alternate disappeared */
if (!is_directory(path->buf)) {
error("object directory %s does not exist; "
"check .git/objects/info/alternates.",
path->buf);
return 0;
}
/*
* Prevent the common mistake of listing the same
* thing twice, or object directory itself.
*/
for (alt = alt_odb_list; alt; alt = alt->next) {
if (!fspathcmp(path->buf, alt->path))
return 0;
}
if (!fspathcmp(path->buf, normalized_objdir))
return 0;
return 1;
}
/*
* Prepare alternate object database registry.
*
* The variable alt_odb_list points at the list of struct
* alternate_object_database. The elements on this list come from
* non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
* environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
* whose contents is similar to that environment variable but can be
* LF separated. Its base points at a statically allocated buffer that
* contains "/the/directory/corresponding/to/.git/objects/...", while
* its name points just after the slash at the end of ".git/objects/"
* in the example above, and has enough space to hold 40-byte hex
* SHA1, an extra slash for the first level indirection, and the
* terminating NUL.
*/
static void read_info_alternates(const char * relative_base, int depth);
static int link_alt_odb_entry(const char *entry, const char *relative_base,
int depth, const char *normalized_objdir)
{
struct alternate_object_database *ent;
struct strbuf pathbuf = STRBUF_INIT;
if (!is_absolute_path(entry) && relative_base) {
strbuf_realpath(&pathbuf, relative_base, 1);
strbuf_addch(&pathbuf, '/');
}
strbuf_addstr(&pathbuf, entry);
alternates: re-allow relative paths from environment Commit 670c359da (link_alt_odb_entry: handle normalize_path errors, 2016-10-03) regressed the handling of relative paths in the GIT_ALTERNATE_OBJECT_DIRECTORIES variable. It's not entirely clear this was ever meant to work, but it _has_ worked for several years, so this commit restores the original behavior. When we get a path in GIT_ALTERNATE_OBJECT_DIRECTORIES, we add it the path to the list of alternate object directories as if it were found in objects/info/alternates, but with one difference: we do not provide the link_alt_odb_entry() function with a base for relative paths. That function doesn't turn it into an absolute path, and we end up feeding the relative path to the strbuf_normalize_path() function. Most relative paths break out of the top-level directory (e.g., "../foo.git/objects"), and thus normalizing fails. Prior to 670c359da, we simply ignored the error, and due to the way normalize_path_copy() was implemented it happened to return the original path in this case. We then accessed the alternate objects using this relative path. By storing the relative path in the alt_odb list, the path is relative to wherever we happen to be at the time we do an object lookup. That means we look from $GIT_DIR in a bare repository, and from the top of the worktree in a non-bare repository. If this were being designed from scratch, it would make sense to pick a stable location (probably $GIT_DIR, or even the object directory) and use that as the relative base, turning the result into an absolute path. However, given the history, at this point the minimal fix is to match the pre-670c359da behavior. We can do this simply by ignoring the error when we have no relative base and using the original value (which we now reliably have, thanks to strbuf_normalize_path()). That still leaves us with a relative path that foils our duplicate detection, and may act strangely if we ever chdir() later in the process. We could solve that by storing an absolute path based on getcwd(). That may be a good future direction; for now we'll do just the minimum to fix the regression. The new t5615 script demonstrates the fix in its final three tests. Since we didn't have any tests of the alternates environment variable at all, it also adds some tests of absolute paths. Reported-by: Bryan Turner <bturner@atlassian.com> Signed-off-by: Jeff King <peff@peff.net>
2016-11-08 05:50:17 +01:00
if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) {
link_alt_odb_entry: handle normalize_path errors When we add a new alternate to the list, we try to normalize out any redundant "..", etc. However, we do not look at the return value of normalize_path_copy(), and will happily continue with a path that could not be normalized. Worse, the normalizing process is done in-place, so we are left with whatever half-finished working state the normalizing function was in. Fortunately, this cannot cause us to read past the end of our buffer, as that working state will always leave the NUL from the original path in place. And we do tend to notice problems when we check is_directory() on the path. But you can see the nonsense that we feed to is_directory with an entry like: this/../../is/../../way/../../too/../../deep/../../to/../../resolve in your objects/info/alternates, which yields: error: object directory /to/e/deep/too/way//ects/this/../../is/../../way/../../too/../../deep/../../to/../../resolve does not exist; check .git/objects/info/alternates. We can easily fix this just by checking the return value. But that makes it hard to generate a good error message, since we're normalizing in-place and our input value has been overwritten by cruft. Instead, let's provide a strbuf helper that does an in-place normalize, but restores the original contents on error. This uses a second buffer under the hood, which is slightly less efficient, but this is not a performance-critical code path. The strbuf helper can also properly set the "len" parameter of the strbuf before returning. Just doing: normalize_path_copy(buf.buf, buf.buf); will shorten the string, but leave buf.len at the original length. That may be confusing to later code which uses the strbuf. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-03 22:34:17 +02:00
error("unable to normalize alternate object path: %s",
pathbuf.buf);
strbuf_release(&pathbuf);
return -1;
}
/*
* The trailing slash after the directory name is given by
* this function at the end. Remove duplicates.
*/
while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
strbuf_setlen(&pathbuf, pathbuf.len - 1);
if (!alt_odb_usable(&pathbuf, normalized_objdir)) {
strbuf_release(&pathbuf);
return -1;
}
ent = alloc_alt_odb(pathbuf.buf);
/* add the alternate entry */
*alt_odb_tail = ent;
alt_odb_tail = &(ent->next);
ent->next = NULL;
/* recursively add alternates */
read_info_alternates(pathbuf.buf, depth + 1);
strbuf_release(&pathbuf);
return 0;
}
alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-12 20:52:22 +01:00
static const char *parse_alt_odb_entry(const char *string,
int sep,
struct strbuf *out)
{
const char *end;
strbuf_reset(out);
if (*string == '#') {
/* comment; consume up to next separator */
end = strchrnul(string, sep);
} else if (*string == '"' && !unquote_c_style(out, string, &end)) {
/*
* quoted path; unquote_c_style has copied the
* data for us and set "end". Broken quoting (e.g.,
* an entry that doesn't end with a quote) falls
* back to the unquoted case below.
*/
} else {
/* normal, unquoted path */
end = strchrnul(string, sep);
strbuf_add(out, string, end - string);
}
if (*end)
end++;
return end;
}
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
static void link_alt_odb_entries(const char *alt, int sep,
const char *relative_base, int depth)
{
struct strbuf objdirbuf = STRBUF_INIT;
alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-12 20:52:22 +01:00
struct strbuf entry = STRBUF_INIT;
if (depth > 5) {
error("%s: ignoring alternate object stores, nesting too deep.",
relative_base);
return;
}
strbuf_add_absolute_path(&objdirbuf, get_object_directory());
link_alt_odb_entry: handle normalize_path errors When we add a new alternate to the list, we try to normalize out any redundant "..", etc. However, we do not look at the return value of normalize_path_copy(), and will happily continue with a path that could not be normalized. Worse, the normalizing process is done in-place, so we are left with whatever half-finished working state the normalizing function was in. Fortunately, this cannot cause us to read past the end of our buffer, as that working state will always leave the NUL from the original path in place. And we do tend to notice problems when we check is_directory() on the path. But you can see the nonsense that we feed to is_directory with an entry like: this/../../is/../../way/../../too/../../deep/../../to/../../resolve in your objects/info/alternates, which yields: error: object directory /to/e/deep/too/way//ects/this/../../is/../../way/../../too/../../deep/../../to/../../resolve does not exist; check .git/objects/info/alternates. We can easily fix this just by checking the return value. But that makes it hard to generate a good error message, since we're normalizing in-place and our input value has been overwritten by cruft. Instead, let's provide a strbuf helper that does an in-place normalize, but restores the original contents on error. This uses a second buffer under the hood, which is slightly less efficient, but this is not a performance-critical code path. The strbuf helper can also properly set the "len" parameter of the strbuf before returning. Just doing: normalize_path_copy(buf.buf, buf.buf); will shorten the string, but leave buf.len at the original length. That may be confusing to later code which uses the strbuf. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-03 22:34:17 +02:00
if (strbuf_normalize_path(&objdirbuf) < 0)
die("unable to normalize object directory: %s",
objdirbuf.buf);
alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-12 20:52:22 +01:00
while (*alt) {
alt = parse_alt_odb_entry(alt, sep, &entry);
if (!entry.len)
continue;
alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-12 20:52:22 +01:00
link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf);
}
alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-12 20:52:22 +01:00
strbuf_release(&entry);
strbuf_release(&objdirbuf);
}
static void read_info_alternates(const char * relative_base, int depth)
{
char *path;
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
struct strbuf buf = STRBUF_INIT;
path = xstrfmt("%s/info/alternates", relative_base);
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
if (strbuf_read_file(&buf, path, 1024) < 0) {
warn_on_fopen_errors(path);
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
free(path);
return;
}
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
link_alt_odb_entries(buf.buf, '\n', relative_base, depth);
strbuf_release(&buf);
free(path);
}
struct alternate_object_database *alloc_alt_odb(const char *dir)
{
struct alternate_object_database *ent;
FLEX_ALLOC_STR(ent, path, dir);
strbuf_init(&ent->scratch, 0);
strbuf_addf(&ent->scratch, "%s/", dir);
ent->base_len = ent->scratch.len;
return ent;
}
void add_to_alternates_file(const char *reference)
{
struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
char *alts = git_pathdup("objects/info/alternates");
FILE *in, *out;
hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR);
out = fdopen_lock_file(lock, "w");
if (!out)
die_errno("unable to fdopen alternates lockfile");
in = fopen(alts, "r");
if (in) {
struct strbuf line = STRBUF_INIT;
int found = 0;
while (strbuf_getline(&line, in) != EOF) {
if (!strcmp(reference, line.buf)) {
found = 1;
break;
}
fprintf_or_die(out, "%s\n", line.buf);
}
strbuf_release(&line);
fclose(in);
if (found) {
rollback_lock_file(lock);
lock = NULL;
}
}
else if (errno != ENOENT)
die_errno("unable to read alternates file");
if (lock) {
fprintf_or_die(out, "%s\n", reference);
if (commit_lock_file(lock))
die_errno("unable to move new alternates file into place");
if (alt_odb_tail)
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
link_alt_odb_entries(reference, '\n', NULL, 0);
}
free(alts);
}
void add_to_alternates_memory(const char *reference)
{
/*
* Make sure alternates are initialized, or else our entry may be
* overwritten when they are.
*/
prepare_alt_odb();
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
link_alt_odb_entries(reference, '\n', NULL, 0);
}
/*
* Compute the exact path an alternate is at and returns it. In case of
* error NULL is returned and the human readable error is added to `err`
* `path` may be relative and should point to $GITDIR.
* `err` must not be null.
*/
char *compute_alternate_path(const char *path, struct strbuf *err)
{
char *ref_git = NULL;
const char *repo, *ref_git_s;
int seen_error = 0;
ref_git_s = real_path_if_valid(path);
if (!ref_git_s) {
seen_error = 1;
strbuf_addf(err, _("path '%s' does not exist"), path);
goto out;
} else
/*
* Beware: read_gitfile(), real_path() and mkpath()
* return static buffer
*/
ref_git = xstrdup(ref_git_s);
repo = read_gitfile(ref_git);
if (!repo)
repo = read_gitfile(mkpath("%s/.git", ref_git));
if (repo) {
free(ref_git);
ref_git = xstrdup(repo);
}
if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) {
char *ref_git_git = mkpathdup("%s/.git", ref_git);
free(ref_git);
ref_git = ref_git_git;
} else if (!is_directory(mkpath("%s/objects", ref_git))) {
struct strbuf sb = STRBUF_INIT;
seen_error = 1;
if (get_common_dir(&sb, ref_git)) {
strbuf_addf(err,
_("reference repository '%s' as a linked "
"checkout is not supported yet."),
path);
goto out;
}
strbuf_addf(err, _("reference repository '%s' is not a "
"local repository."), path);
goto out;
}
if (!access(mkpath("%s/shallow", ref_git), F_OK)) {
strbuf_addf(err, _("reference repository '%s' is shallow"),
path);
seen_error = 1;
goto out;
}
if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) {
strbuf_addf(err,
_("reference repository '%s' is grafted"),
path);
seen_error = 1;
goto out;
}
out:
if (seen_error) {
FREE_AND_NULL(ref_git);
}
return ref_git;
}
int foreach_alt_odb(alt_odb_fn fn, void *cb)
{
struct alternate_object_database *ent;
int r = 0;
prepare_alt_odb();
for (ent = alt_odb_list; ent; ent = ent->next) {
r = fn(ent, cb);
if (r)
break;
}
return r;
}
void prepare_alt_odb(void)
{
const char *alt;
if (alt_odb_tail)
return;
alt = getenv(ALTERNATE_DB_ENVIRONMENT);
if (!alt) alt = "";
alt_odb_tail = &alt_odb_list;
read_info_alternates: read contents into strbuf This patch fixes a regression in v2.11.1 where we might read past the end of an mmap'd buffer. It was introduced in cf3c635210. The link_alt_odb_entries() function has always taken a ptr/len pair as input. Until cf3c635210 (alternates: accept double-quoted paths, 2016-12-12), we made a copy of those bytes in a string. But after that commit, we switched to parsing the input left-to-right, and we ignore "len" totally, instead reading until we hit a NUL. This has mostly gone unnoticed for a few reasons: 1. All but one caller passes a NUL-terminated string, with "len" pointing to the NUL. 2. The remaining caller, read_info_alternates(), passes in an mmap'd file. Unless the file is an exact multiple of the page size, it will generally be followed by NUL padding to the end of the page, which just works. The easiest way to demonstrate the problem is to build with: make SANITIZE=address NO_MMAP=Nope test Any test which involves $GIT_DIR/info/alternates will fail, as the mmap emulation (correctly) does not add an extra NUL, and ASAN complains about reading past the end of the buffer. One solution would be to teach link_alt_odb_entries() to respect "len". But it's actually a bit tricky, since we depend on unquote_c_style() under the hood, and it has no ptr/len variant. We could also just make a NUL-terminated copy of the input bytes and operate on that. But since all but one caller already is passing a string, instead let's just fix that caller to provide NUL-terminated input in the first place, by swapping out mmap for strbuf_read_file(). There's no advantage to using mmap on the alternates file. It's not expected to be large (and anyway, we're copying its contents into an in-memory linked list). Nor is using git_open() buying us anything here, since we don't keep the descriptor open for a long period of time. Let's also drop the "len" parameter entirely from link_alt_odb_entries(), since it's completely ignored. That will avoid any new callers re-introducing a similar bug. Reported-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-19 21:41:07 +02:00
link_alt_odb_entries(alt, PATH_SEP, NULL, 0);
read_info_alternates(get_object_directory(), 0);
}
/* Returns 1 if we have successfully freshened the file, 0 otherwise. */
static int freshen_file(const char *fn)
{
struct utimbuf t;
t.actime = t.modtime = time(NULL);
return !utime(fn, &t);
}
/*
* All of the check_and_freshen functions return 1 if the file exists and was
* freshened (if freshening was requested), 0 otherwise. If they return
* 0, you should not assume that it is safe to skip a write of the object (it
* either does not exist on disk, or has a stale mtime and may be subject to
* pruning).
*/
int check_and_freshen_file(const char *fn, int freshen)
{
if (access(fn, F_OK))
return 0;
if (freshen && !freshen_file(fn))
return 0;
return 1;
}
static int check_and_freshen_local(const unsigned char *sha1, int freshen)
{
return check_and_freshen_file(sha1_file_name(sha1), freshen);
}
static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
{
struct alternate_object_database *alt;
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
const char *path = alt_sha1_path(alt, sha1);
if (check_and_freshen_file(path, freshen))
return 1;
}
return 0;
}
static int check_and_freshen(const unsigned char *sha1, int freshen)
{
return check_and_freshen_local(sha1, freshen) ||
check_and_freshen_nonlocal(sha1, freshen);
}
int has_loose_object_nonlocal(const unsigned char *sha1)
{
return check_and_freshen_nonlocal(sha1, 0);
}
static int has_loose_object(const unsigned char *sha1)
{
return check_and_freshen(sha1, 0);
}
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
static unsigned int pack_used_ctr;
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
static unsigned int pack_open_windows;
static unsigned int pack_open_fds;
static unsigned int pack_max_fds;
static size_t peak_pack_mapped;
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
static size_t pack_mapped;
struct packed_git *packed_git;
find_pack_entry: replace last_found_pack with MRU cache Each pack has an index for looking up entries in O(log n) time, but if we have multiple packs, we have to scan through them linearly. This can produce a measurable overhead for some operations. We dealt with this long ago in f7c22cc (always start looking up objects in the last used pack first, 2007-05-30), which keeps what is essentially a 1-element most-recently-used cache. In theory, we should be able to do better by keeping a similar but longer cache, that is the same length as the pack-list itself. Since we now have a convenient generic MRU structure, we can plug it in and measure. Here are the numbers for running p5303 against linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8% 5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0% 5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3% 5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6% 5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7% 5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9% The main numbers of interest here are the rev-list ones (since that is exercising the normal object lookup code path). The single-pack case shouldn't improve at all; the 260ms speedup there is just part of the run-to-run noise (but it's important to note that we didn't make anything worse with the overhead of maintaining our cache). In the 50-pack case, we see similar results. There may be a slight improvement, but it's mostly within the noise. The 1000-pack case does show a big improvement, though. That carries over to the repack case, as well. Even though we haven't touched its pack-search loop yet, it does still do a lot of normal object lookups (e.g., for the internal revision walk), and so improves. As a point of reference, I also ran the 1000-pack test against a version of HEAD^ with the last_found_pack optimization disabled. It takes ~60s, so that gives an indication of how much even the single-element cache is helping. For comparison, here's a smaller repository, git.git: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3% 5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1% 5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6% 5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0% 5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8% 5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7% You can see that the percentage improvement is similar. That's because the lookup we are optimizing is roughly O(nr_objects * nr_packs). Since the number of packs is constant in both tests, we'd expect the improvement to be linear in the number of objects. But the whole process is also linear in the number of objects, so the improvement is a constant factor. The exact improvement does also depend on the contents of the packs. In p5303, the extra packs all have 5 first-parent commits in them, which is a reasonable simulation of a pushed-to repository. But it also means that only 250 first-parent commits are in those packs (compared to almost 50,000 total in linux.git), and the rest are in the huge "base" pack. So once we start looking at history in taht big pack, that's where we'll find most everything, and even the 1-element cache gets close to 100% cache hits. You could almost certainly show better numbers with a more pathological case (e.g., distributing the objects more evenly across the packs). But that's simply not that realistic a scenario, so it makes more sense to focus on these numbers. The implementation itself is a straightforward application of the MRU code. We provide an MRU-ordered list of packs that shadows the packed_git list. This is easy to do because we only create and revise the pack list in one place. The "reprepare" code path actually drops the whole MRU and replaces it for simplicity. It would be more efficient to just add new entries, but there's not much point in optimizing here; repreparing happens rarely, and only after doing a lot of other expensive work. The key things to keep optimized are traversal (which is just a normal linked list, albeit with one extra level of indirection over the regular packed_git list), and marking (which is a constant number of pointer assignments, though slightly more than the old last_found_pack was; it doesn't seem to create a measurable slowdown, though). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
static struct mru packed_git_mru_storage;
struct mru *packed_git_mru = &packed_git_mru_storage;
void pack_report(void)
{
fprintf(stderr,
"pack_report: getpagesize() = %10" SZ_FMT "\n"
"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
sz_fmt(getpagesize()),
sz_fmt(packed_git_window_size),
sz_fmt(packed_git_limit));
fprintf(stderr,
"pack_report: pack_used_ctr = %10u\n"
"pack_report: pack_mmap_calls = %10u\n"
"pack_report: pack_open_windows = %10u / %10u\n"
"pack_report: pack_mapped = "
"%10" SZ_FMT " / %10" SZ_FMT "\n",
pack_used_ctr,
pack_mmap_calls,
pack_open_windows, peak_pack_open_windows,
sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
}
/*
* Open and mmap the index file at path, perform a couple of
* consistency checks, then record its information to p. Return 0 on
* success.
*/
static int check_packed_git_idx(const char *path, struct packed_git *p)
{
void *idx_map;
struct pack_idx_header *hdr;
size_t idx_size;
uint32_t version, nr, i, *index;
int fd = git_open(path);
struct stat st;
if (fd < 0)
return -1;
if (fstat(fd, &st)) {
close(fd);
return -1;
}
idx_size = xsize_t(st.st_size);
if (idx_size < 4 * 256 + 20 + 20) {
close(fd);
return error("index file %s is too small", path);
}
idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
hdr = idx_map;
if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
version = ntohl(hdr->idx_version);
if (version < 2 || version > 2) {
munmap(idx_map, idx_size);
return error("index file %s is version %"PRIu32
" and is not supported by this binary"
" (try upgrading GIT to a newer version)",
path, version);
}
} else
version = 1;
nr = 0;
index = idx_map;
if (version > 1)
index += 2; /* skip index header */
for (i = 0; i < 256; i++) {
uint32_t n = ntohl(index[i]);
if (n < nr) {
munmap(idx_map, idx_size);
return error("non-monotonic index %s", path);
}
nr = n;
}
if (version == 1) {
/*
* Total size:
* - 256 index entries 4 bytes each
* - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
* - 20-byte SHA1 of the packfile
* - 20-byte SHA1 file checksum
*/
if (idx_size != 4*256 + nr * 24 + 20 + 20) {
munmap(idx_map, idx_size);
return error("wrong index v1 file size in %s", path);
}
} else if (version == 2) {
/*
* Minimum size:
* - 8 bytes of header
* - 256 index entries 4 bytes each
* - 20-byte sha1 entry * nr
* - 4-byte crc entry * nr
* - 4-byte offset entry * nr
* - 20-byte SHA1 of the packfile
* - 20-byte SHA1 file checksum
* And after the 4-byte offset table might be a
* variable sized table containing 8-byte entries
* for offsets larger than 2^31.
*/
unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
unsigned long max_size = min_size;
if (nr)
max_size += (nr - 1)*8;
if (idx_size < min_size || idx_size > max_size) {
munmap(idx_map, idx_size);
return error("wrong index v2 file size in %s", path);
}
if (idx_size != min_size &&
/*
* make sure we can deal with large pack offsets.
* 31-bit signed offset won't be enough, neither
* 32-bit unsigned one will be.
*/
(sizeof(off_t) <= 4)) {
munmap(idx_map, idx_size);
return error("pack too large for current definition of off_t in %s", path);
}
}
p->index_version = version;
p->index_data = idx_map;
p->index_size = idx_size;
p->num_objects = nr;
return 0;
}
int open_pack_index(struct packed_git *p)
{
char *idx_name;
size_t len;
int ret;
if (p->index_data)
return 0;
if (!strip_suffix(p->pack_name, ".pack", &len))
die("BUG: pack_name does not end in .pack");
idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
ret = check_packed_git_idx(idx_name, p);
free(idx_name);
return ret;
}
static void scan_windows(struct packed_git *p,
struct packed_git **lru_p,
struct pack_window **lru_w,
struct pack_window **lru_l)
{
struct pack_window *w, *w_l;
for (w_l = NULL, w = p->windows; w; w = w->next) {
if (!w->inuse_cnt) {
if (!*lru_w || w->last_used < (*lru_w)->last_used) {
*lru_p = p;
*lru_w = w;
*lru_l = w_l;
}
}
w_l = w;
}
}
static int unuse_one_window(struct packed_git *current)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *lru_w = NULL, *lru_l = NULL;
if (current)
scan_windows(current, &lru_p, &lru_w, &lru_l);
for (p = packed_git; p; p = p->next)
scan_windows(p, &lru_p, &lru_w, &lru_l);
if (lru_p) {
munmap(lru_w->base, lru_w->len);
pack_mapped -= lru_w->len;
if (lru_l)
lru_l->next = lru_w->next;
else
lru_p->windows = lru_w->next;
free(lru_w);
pack_open_windows--;
return 1;
}
return 0;
}
void release_pack_memory(size_t need)
{
size_t cur = pack_mapped;
while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
; /* nothing */
}
static void mmap_limit_check(size_t length)
{
static size_t limit = 0;
if (!limit) {
limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
if (!limit)
limit = SIZE_MAX;
}
if (length > limit)
die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
(uintmax_t)length, (uintmax_t)limit);
}
void *xmmap_gently(void *start, size_t length,
int prot, int flags, int fd, off_t offset)
{
void *ret;
mmap_limit_check(length);
ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED) {
if (!length)
return NULL;
release_pack_memory(length);
ret = mmap(start, length, prot, flags, fd, offset);
}
return ret;
}
void *xmmap(void *start, size_t length,
int prot, int flags, int fd, off_t offset)
{
void *ret = xmmap_gently(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED)
die_errno("mmap failed");
return ret;
}
Fix random fast-import errors when compiled with NO_MMAP fast-import was relying on the fact that on most systems mmap() and write() are synchronized by the filesystem's buffer cache. We were relying on the ability to mmap() 20 bytes beyond the current end of the file, then later fill in those bytes with a future write() call, then read them through the previously obtained mmap() address. This isn't always true with some implementations of NFS, but it is especially not true with our NO_MMAP=YesPlease build time option used on some platforms. If fast-import was built with NO_MMAP=YesPlease we used the malloc()+pread() emulation and the subsequent write() call does not update the trailing 20 bytes of a previously obtained "mmap()" (aka malloc'd) address. Under NO_MMAP that behavior causes unpack_entry() in sha1_file.c to be unable to read an object header (or data) that has been unlucky enough to be written to the packfile at a location such that it is in the trailing 20 bytes of a window previously opened on that same packfile. This bug has gone unnoticed for a very long time as it is highly data dependent. Not only does the object have to be placed at the right position, but it also needs to be positioned behind some other object that has been accessed due to a branch cache invalidation. In other words the stars had to align just right, and if you did run into this bug you probably should also have purchased a lottery ticket. Fortunately the workaround is a lot easier than the bug explanation. Before we allow unpack_entry() to read data from a pack window that has also (possibly) been modified through write() we force all existing windows on that packfile to be closed. By closing the windows we ensure that any new access via the emulated mmap() will reread the packfile, updating to the current file content. This comes at a slight performance degredation as we cannot reuse previously cached windows when we update the packfile. But it is a fairly minor difference as the window closes happen at only two points: - When the packfile is finalized and its .idx is generated: At this stage we are getting ready to update the refs and any data access into the packfile is going to be random, and is going after only the branch tips (to ensure they are valid). Our existing windows (if any) are not likely to be positioned at useful locations to access those final tip commits so we probably were closing them before anyway. - When the branch cache missed and we need to reload: At this point fast-import is getting change commands for the next commit and it needs to go re-read a tree object it previously had written out to the packfile. What windows we had (if any) are not likely to cover the tree in question so we probably were closing them before anyway. We do try to avoid unnecessarily closing windows in the second case by checking to see if the packfile size has increased since the last time we called unpack_entry() on that packfile. If the size has not changed then we have not written additional data, and any existing window is still vaild. This nicely handles the cases where fast-import is going through a branch cache reload and needs to read many trees at once. During such an event we are not likely to be updating the packfile so we do not cycle the windows between reads. With this change in place t9301-fast-export.sh (which was broken by c3b0dec509fe136c5417422f31898b5a4e2d5e02) finally works again. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-01-18 04:57:00 +01:00
void close_pack_windows(struct packed_git *p)
{
while (p->windows) {
struct pack_window *w = p->windows;
if (w->inuse_cnt)
die("pack '%s' still has open windows to it",
p->pack_name);
munmap(w->base, w->len);
pack_mapped -= w->len;
pack_open_windows--;
p->windows = w->next;
free(w);
}
}
static int close_pack_fd(struct packed_git *p)
{
if (p->pack_fd < 0)
return 0;
close(p->pack_fd);
pack_open_fds--;
p->pack_fd = -1;
return 1;
}
static void close_pack(struct packed_git *p)
{
close_pack_windows(p);
close_pack_fd(p);
close_pack_index(p);
}
void close_all_packs(void)
{
struct packed_git *p;
for (p = packed_git; p; p = p->next)
if (p->do_not_close)
die("BUG: want to close pack marked 'do-not-close'");
else
close_pack(p);
}
sha1_file: introduce close_one_pack() to close packs on fd pressure When the number of open packs exceeds pack_max_fds, unuse_one_window() is called repeatedly to attempt to release the least-recently-used pack windows, which, as a side-effect, will also close a pack file after closing its last open window. If a pack file has been opened, but no windows have been allocated into it, it will never be selected by unuse_one_window() and hence its file descriptor will not be closed. When this happens, git may exceed the number of file descriptors permitted by the system. This latter situation can occur in show-ref or receive-pack during ref advertisement. During ref advertisement, receive-pack will iterate over every ref in the repository and advertise it to the client after ensuring that the ref exists in the local repository. If the ref is located inside a pack, then the pack is opened to ensure that it exists, but since the object is not actually read from the pack, no mmap windows are allocated. When the number of open packs exceeds pack_max_fds, unuse_one_window() will not be able to find any windows to free and will not be able to close any packs. Once the per-process file descriptor limit is exceeded, receive-pack will produce a warning, not an error, for each pack it cannot open, and will then most likely fail with an error to spawn rev-list or index-pack like: error: cannot create standard input pipe for rev-list: Too many open files error: Could not run 'git rev-list' This may also occur during upload-pack when refs are packed (in the packed-refs file) and the number of packs that must be opened to verify that these packed refs exist exceeds the file descriptor limit. If the refs are loose, then upload-pack will read each ref from the object database (if the object is in a pack, allocating one or more mmap windows for it) in order to peel tags and advertise the underlying object. But when the refs are packed and peeled, upload-pack will use the peeled sha1 in the packed-refs file and will not need to read from the pack files, so no mmap windows will be allocated and just like with receive-pack, unuse_one_window() will never select these opened packs to close. When we have file descriptor pressure, we just need to find an open pack to close. We can leave the existing mmap windows open. If additional windows need to be mapped into the pack file, it will be reopened when necessary. If the pack file has been rewritten in the mean time, open_packed_git_1() should notice when it compares the file size or the pack's sha1 checksum to what was previously read from the pack index, and reject it. Let's introduce a new function close_one_pack() designed specifically for this purpose to search for and close the least-recently-used pack, where LRU is defined as (in order of preference): * pack with oldest mtime and no allocated mmap windows * pack with the least-recently-used windows, i.e. the pack with the oldest most-recently-used window, where none of the windows are in use * pack with the least-recently-used windows Signed-off-by: Brandon Casey <drafnel@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 07:36:33 +02:00
/*
* The LRU pack is the one with the oldest MRU window, preferring packs
* with no used windows, or the oldest mtime if it has no windows allocated.
*/
static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
{
struct pack_window *w, *this_mru_w;
int has_windows_inuse = 0;
/*
* Reject this pack if it has windows and the previously selected
* one does not. If this pack does not have windows, reject
* it if the pack file is newer than the previously selected one.
*/
if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
return;
for (w = this_mru_w = p->windows; w; w = w->next) {
/*
* Reject this pack if any of its windows are in use,
* but the previously selected pack did not have any
* inuse windows. Otherwise, record that this pack
* has windows in use.
*/
if (w->inuse_cnt) {
if (*accept_windows_inuse)
has_windows_inuse = 1;
else
return;
}
if (w->last_used > this_mru_w->last_used)
this_mru_w = w;
/*
* Reject this pack if it has windows that have been
* used more recently than the previously selected pack.
* If the previously selected pack had windows inuse and
* we have not encountered a window in this pack that is
* inuse, skip this check since we prefer a pack with no
* inuse windows to one that has inuse windows.
*/
if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
this_mru_w->last_used > (*mru_w)->last_used)
return;
}
/*
* Select this pack.
*/
*mru_w = this_mru_w;
*lru_p = p;
*accept_windows_inuse = has_windows_inuse;
}
static int close_one_pack(void)
{
struct packed_git *p, *lru_p = NULL;
struct pack_window *mru_w = NULL;
int accept_windows_inuse = 1;
for (p = packed_git; p; p = p->next) {
if (p->pack_fd == -1)
continue;
find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
}
if (lru_p)
return close_pack_fd(lru_p);
sha1_file: introduce close_one_pack() to close packs on fd pressure When the number of open packs exceeds pack_max_fds, unuse_one_window() is called repeatedly to attempt to release the least-recently-used pack windows, which, as a side-effect, will also close a pack file after closing its last open window. If a pack file has been opened, but no windows have been allocated into it, it will never be selected by unuse_one_window() and hence its file descriptor will not be closed. When this happens, git may exceed the number of file descriptors permitted by the system. This latter situation can occur in show-ref or receive-pack during ref advertisement. During ref advertisement, receive-pack will iterate over every ref in the repository and advertise it to the client after ensuring that the ref exists in the local repository. If the ref is located inside a pack, then the pack is opened to ensure that it exists, but since the object is not actually read from the pack, no mmap windows are allocated. When the number of open packs exceeds pack_max_fds, unuse_one_window() will not be able to find any windows to free and will not be able to close any packs. Once the per-process file descriptor limit is exceeded, receive-pack will produce a warning, not an error, for each pack it cannot open, and will then most likely fail with an error to spawn rev-list or index-pack like: error: cannot create standard input pipe for rev-list: Too many open files error: Could not run 'git rev-list' This may also occur during upload-pack when refs are packed (in the packed-refs file) and the number of packs that must be opened to verify that these packed refs exist exceeds the file descriptor limit. If the refs are loose, then upload-pack will read each ref from the object database (if the object is in a pack, allocating one or more mmap windows for it) in order to peel tags and advertise the underlying object. But when the refs are packed and peeled, upload-pack will use the peeled sha1 in the packed-refs file and will not need to read from the pack files, so no mmap windows will be allocated and just like with receive-pack, unuse_one_window() will never select these opened packs to close. When we have file descriptor pressure, we just need to find an open pack to close. We can leave the existing mmap windows open. If additional windows need to be mapped into the pack file, it will be reopened when necessary. If the pack file has been rewritten in the mean time, open_packed_git_1() should notice when it compares the file size or the pack's sha1 checksum to what was previously read from the pack index, and reject it. Let's introduce a new function close_one_pack() designed specifically for this purpose to search for and close the least-recently-used pack, where LRU is defined as (in order of preference): * pack with oldest mtime and no allocated mmap windows * pack with the least-recently-used windows, i.e. the pack with the oldest most-recently-used window, where none of the windows are in use * pack with the least-recently-used windows Signed-off-by: Brandon Casey <drafnel@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 07:36:33 +02:00
return 0;
}
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
void unuse_pack(struct pack_window **w_cursor)
{
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
struct pack_window *w = *w_cursor;
if (w) {
w->inuse_cnt--;
*w_cursor = NULL;
}
}
void close_pack_index(struct packed_git *p)
{
if (p->index_data) {
munmap((void *)p->index_data, p->index_size);
p->index_data = NULL;
}
}
static unsigned int get_max_fd_limit(void)
{
#ifdef RLIMIT_NOFILE
{
struct rlimit lim;
if (!getrlimit(RLIMIT_NOFILE, &lim))
return lim.rlim_cur;
}
#endif
#ifdef _SC_OPEN_MAX
{
long open_max = sysconf(_SC_OPEN_MAX);
if (0 < open_max)
return open_max;
/*
* Otherwise, we got -1 for one of the two
* reasons:
*
* (1) sysconf() did not understand _SC_OPEN_MAX
* and signaled an error with -1; or
* (2) sysconf() said there is no limit.
*
* We _could_ clear errno before calling sysconf() to
* tell these two cases apart and return a huge number
* in the latter case to let the caller cap it to a
* value that is not so selfish, but letting the
* fallback OPEN_MAX codepath take care of these cases
* is a lot simpler.
*/
}
#endif
#ifdef OPEN_MAX
return OPEN_MAX;
#else
return 1; /* see the caller ;-) */
#endif
}
/*
* Do not call this directly as this leaks p->pack_fd on error return;
* call open_packed_git() instead.
*/
static int open_packed_git_1(struct packed_git *p)
{
struct stat st;
struct pack_header hdr;
unsigned char sha1[20];
unsigned char *idx_sha1;
long fd_flag;
if (!p->index_data && open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
if (!pack_max_fds) {
unsigned int max_fds = get_max_fd_limit();
/* Save 3 for stdin/stdout/stderr, 22 for work */
if (25 < max_fds)
pack_max_fds = max_fds - 25;
else
pack_max_fds = 1;
}
sha1_file: introduce close_one_pack() to close packs on fd pressure When the number of open packs exceeds pack_max_fds, unuse_one_window() is called repeatedly to attempt to release the least-recently-used pack windows, which, as a side-effect, will also close a pack file after closing its last open window. If a pack file has been opened, but no windows have been allocated into it, it will never be selected by unuse_one_window() and hence its file descriptor will not be closed. When this happens, git may exceed the number of file descriptors permitted by the system. This latter situation can occur in show-ref or receive-pack during ref advertisement. During ref advertisement, receive-pack will iterate over every ref in the repository and advertise it to the client after ensuring that the ref exists in the local repository. If the ref is located inside a pack, then the pack is opened to ensure that it exists, but since the object is not actually read from the pack, no mmap windows are allocated. When the number of open packs exceeds pack_max_fds, unuse_one_window() will not be able to find any windows to free and will not be able to close any packs. Once the per-process file descriptor limit is exceeded, receive-pack will produce a warning, not an error, for each pack it cannot open, and will then most likely fail with an error to spawn rev-list or index-pack like: error: cannot create standard input pipe for rev-list: Too many open files error: Could not run 'git rev-list' This may also occur during upload-pack when refs are packed (in the packed-refs file) and the number of packs that must be opened to verify that these packed refs exist exceeds the file descriptor limit. If the refs are loose, then upload-pack will read each ref from the object database (if the object is in a pack, allocating one or more mmap windows for it) in order to peel tags and advertise the underlying object. But when the refs are packed and peeled, upload-pack will use the peeled sha1 in the packed-refs file and will not need to read from the pack files, so no mmap windows will be allocated and just like with receive-pack, unuse_one_window() will never select these opened packs to close. When we have file descriptor pressure, we just need to find an open pack to close. We can leave the existing mmap windows open. If additional windows need to be mapped into the pack file, it will be reopened when necessary. If the pack file has been rewritten in the mean time, open_packed_git_1() should notice when it compares the file size or the pack's sha1 checksum to what was previously read from the pack index, and reject it. Let's introduce a new function close_one_pack() designed specifically for this purpose to search for and close the least-recently-used pack, where LRU is defined as (in order of preference): * pack with oldest mtime and no allocated mmap windows * pack with the least-recently-used windows, i.e. the pack with the oldest most-recently-used window, where none of the windows are in use * pack with the least-recently-used windows Signed-off-by: Brandon Casey <drafnel@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 07:36:33 +02:00
while (pack_max_fds <= pack_open_fds && close_one_pack())
; /* nothing */
p->pack_fd = git_open(p->pack_name);
if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
return -1;
pack_open_fds++;
/* If we created the struct before we had the pack we lack size. */
if (!p->pack_size) {
if (!S_ISREG(st.st_mode))
return error("packfile %s not a regular file", p->pack_name);
p->pack_size = st.st_size;
} else if (p->pack_size != st.st_size)
return error("packfile %s size changed", p->pack_name);
/* We leave these file descriptors open with sliding mmap;
* there is no point keeping them open across exec(), though.
*/
fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
if (fd_flag < 0)
return error("cannot determine file descriptor flags");
fd_flag |= FD_CLOEXEC;
if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
return error("cannot set FD_CLOEXEC");
/* Verify we recognize this pack file format. */
if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
return error("file %s is far too short to be a packfile", p->pack_name);
if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
return error("file %s is not a GIT packfile", p->pack_name);
if (!pack_version_ok(hdr.hdr_version))
return error("packfile %s is version %"PRIu32" and not"
" supported (try upgrading GIT to a newer version)",
p->pack_name, ntohl(hdr.hdr_version));
/* Verify the pack matches its index. */
if (p->num_objects != ntohl(hdr.hdr_entries))
return error("packfile %s claims to have %"PRIu32" objects"
" while index indicates %"PRIu32" objects",
p->pack_name, ntohl(hdr.hdr_entries),
p->num_objects);
if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
return error("end of packfile %s is unavailable", p->pack_name);
if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
return error("packfile %s signature is unavailable", p->pack_name);
idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
if (hashcmp(sha1, idx_sha1))
return error("packfile %s does not match index", p->pack_name);
return 0;
}
static int open_packed_git(struct packed_git *p)
{
if (!open_packed_git_1(p))
return 0;
close_pack_fd(p);
return -1;
}
static int in_window(struct pack_window *win, off_t offset)
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
{
/* We must promise at least 20 bytes (one hash) after the
* offset is available from this window, otherwise the offset
* is not actually in this window and a different window (which
* has that one hash excess) must be used. This is to support
* the object header and delta base parsing routines below.
*/
off_t win_off = win->offset;
return win_off <= offset
&& (offset + 20) <= (win_off + win->len);
}
unsigned char *use_pack(struct packed_git *p,
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
struct pack_window **w_cursor,
off_t offset,
2011-06-10 20:52:15 +02:00
unsigned long *left)
{
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
struct pack_window *win = *w_cursor;
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
/* Since packfiles end in a hash of their content and it's
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
* pointless to ask for an offset into the middle of that
* hash, and the in_window function above wouldn't match
* don't allow an offset too close to the end of the file.
*/
if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
if (offset > (p->pack_size - 20))
die("offset beyond end of packfile (truncated pack?)");
if (offset < 0)
die(_("offset before end of packfile (broken .idx?)"));
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
if (!win || !in_window(win, offset)) {
if (win)
win->inuse_cnt--;
for (win = p->windows; win; win = win->next) {
if (in_window(win, offset))
break;
}
if (!win) {
size_t window_align = packed_git_window_size / 2;
off_t len;
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
win = xcalloc(1, sizeof(*win));
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
if (len > packed_git_window_size)
len = packed_git_window_size;
win->len = (size_t)len;
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
pack_mapped += win->len;
while (packed_git_limit < pack_mapped
&& unuse_one_window(p))
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
; /* nothing */
win->base = xmmap(NULL, win->len,
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
PROT_READ, MAP_PRIVATE,
p->pack_fd, win->offset);
if (win->base == MAP_FAILED)
die_errno("packfile %s cannot be mapped",
p->pack_name);
if (!win->offset && win->len == p->pack_size
&& !p->do_not_close)
close_pack_fd(p);
pack_mmap_calls++;
pack_open_windows++;
if (pack_mapped > peak_pack_mapped)
peak_pack_mapped = pack_mapped;
if (pack_open_windows > peak_pack_open_windows)
peak_pack_open_windows = pack_open_windows;
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
win->next = p->windows;
p->windows = win;
}
}
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
if (win != *w_cursor) {
win->last_used = pack_used_ctr++;
win->inuse_cnt++;
*w_cursor = win;
}
Fully activate the sliding window pack access. This finally turns on the sliding window behavior for packfile data access by mapping limited size windows and chaining them under the packed_git->windows list. We consider a given byte offset to be within the window only if there would be at least 20 bytes (one hash worth of data) accessible after the requested offset. This range selection relates to the contract that use_pack() makes with its callers, allowing them to access one hash or one object header without needing to call use_pack() for every byte of data obtained. In the worst case scenario we will map the same page of data twice into memory: once at the end of one window and once again at the start of the next window. This duplicate page mapping will happen only when an object header or a delta base reference is spanned over the end of a window and is always limited to just one page of duplication, as no sane operating system will ever have a page size smaller than a hash. I am assuming that the possible wasted page of virtual address space is going to perform faster than the alternatives, which would be to copy the object header or ref delta into a temporary buffer prior to parsing, or to check the window range on every byte during header parsing. We may decide to revisit this decision in the future since this is just a gut instinct decision and has not actually been proven out by experimental testing. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:28 +01:00
offset -= win->offset;
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
if (left)
*left = win->len - xsize_t(offset);
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
return win->base + offset;
}
static struct packed_git *alloc_packed_git(int extra)
{
struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
memset(p, 0, sizeof(*p));
p->pack_fd = -1;
return p;
}
static void try_to_free_pack_memory(size_t size)
{
release_pack_memory(size);
}
struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
{
static int have_set_try_to_free_routine;
struct stat st;
size_t alloc;
struct packed_git *p;
if (!have_set_try_to_free_routine) {
have_set_try_to_free_routine = 1;
set_try_to_free_routine(try_to_free_pack_memory);
}
/*
* Make sure a corresponding .pack file exists and that
* the index looks sane.
*/
if (!strip_suffix_mem(path, &path_len, ".idx"))
return NULL;
/*
* ".pack" is long enough to hold any suffix we're adding (and
* the use xsnprintf double-checks that)
*/
alloc = st_add3(path_len, strlen(".pack"), 1);
p = alloc_packed_git(alloc);
memcpy(p->pack_name, path, path_len);
xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
if (!access(p->pack_name, F_OK))
p->pack_keep = 1;
xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
free(p);
return NULL;
}
/* ok, it looks sane as far as we can check without
* actually mapping the pack file.
*/
p->pack_size = st.st_size;
p->pack_local = local;
p->mtime = st.st_mtime;
if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
hashclr(p->sha1);
return p;
}
struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
{
const char *path = sha1_pack_name(sha1);
size_t alloc = st_add(strlen(path), 1);
struct packed_git *p = alloc_packed_git(alloc);
memcpy(p->pack_name, path, alloc); /* includes NUL */
hashcpy(p->sha1, sha1);
if (check_packed_git_idx(idx_path, p)) {
free(p);
return NULL;
}
return p;
}
void install_packed_git(struct packed_git *pack)
{
if (pack->pack_fd != -1)
pack_open_fds++;
pack->next = packed_git;
packed_git = pack;
}
void (*report_garbage)(unsigned seen_bits, const char *path);
static void report_helper(const struct string_list *list,
int seen_bits, int first, int last)
{
if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
return;
for (; first < last; first++)
report_garbage(seen_bits, list->items[first].string);
}
static void report_pack_garbage(struct string_list *list)
{
int i, baselen = -1, first = 0, seen_bits = 0;
if (!report_garbage)
return;
string_list_sort(list);
for (i = 0; i < list->nr; i++) {
const char *path = list->items[i].string;
if (baselen != -1 &&
strncmp(path, list->items[first].string, baselen)) {
report_helper(list, seen_bits, first, i);
baselen = -1;
seen_bits = 0;
}
if (baselen == -1) {
const char *dot = strrchr(path, '.');
if (!dot) {
report_garbage(PACKDIR_FILE_GARBAGE, path);
continue;
}
baselen = dot - path + 1;
first = i;
}
if (!strcmp(path + baselen, "pack"))
seen_bits |= 1;
else if (!strcmp(path + baselen, "idx"))
seen_bits |= 2;
}
report_helper(list, seen_bits, first, list->nr);
}
static void prepare_packed_git_one(char *objdir, int local)
{
struct strbuf path = STRBUF_INIT;
size_t dirnamelen;
DIR *dir;
struct dirent *de;
struct string_list garbage = STRING_LIST_INIT_DUP;
strbuf_addstr(&path, objdir);
strbuf_addstr(&path, "/pack");
dir = opendir(path.buf);
if (!dir) {
if (errno != ENOENT)
error_errno("unable to open object pack directory: %s",
path.buf);
strbuf_release(&path);
return;
}
strbuf_addch(&path, '/');
dirnamelen = path.len;
while ((de = readdir(dir)) != NULL) {
struct packed_git *p;
size_t base_len;
if (is_dot_or_dotdot(de->d_name))
continue;
strbuf_setlen(&path, dirnamelen);
strbuf_addstr(&path, de->d_name);
base_len = path.len;
if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
/* Don't reopen a pack we already have. */
for (p = packed_git; p; p = p->next) {
size_t len;
if (strip_suffix(p->pack_name, ".pack", &len) &&
len == base_len &&
!memcmp(p->pack_name, path.buf, len))
break;
}
if (p == NULL &&
/*
* See if it really is a valid .idx file with
* corresponding .pack file that we can map.
*/
(p = add_packed_git(path.buf, path.len, local)) != NULL)
install_packed_git(p);
}
if (!report_garbage)
continue;
if (ends_with(de->d_name, ".idx") ||
ends_with(de->d_name, ".pack") ||
ends_with(de->d_name, ".bitmap") ||
ends_with(de->d_name, ".keep"))
string_list_append(&garbage, path.buf);
else
report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
}
closedir(dir);
report_pack_garbage(&garbage);
string_list_clear(&garbage, 0);
strbuf_release(&path);
}
static int approximate_object_count_valid;
/*
* Give a fast, rough count of the number of objects in the repository. This
* ignores loose objects completely. If you have a lot of them, then either
* you should repack because your performance will be awful, or they are
* all unreachable objects about to be pruned, in which case they're not really
* interesting as a measure of repo size in the first place.
*/
unsigned long approximate_object_count(void)
{
static unsigned long count;
if (!approximate_object_count_valid) {
struct packed_git *p;
prepare_packed_git();
count = 0;
for (p = packed_git; p; p = p->next) {
if (open_pack_index(p))
continue;
count += p->num_objects;
}
}
return count;
}
static void *get_next_packed_git(const void *p)
{
return ((const struct packed_git *)p)->next;
}
static void set_next_packed_git(void *p, void *next)
{
((struct packed_git *)p)->next = next;
}
static int sort_pack(const void *a_, const void *b_)
{
const struct packed_git *a = a_;
const struct packed_git *b = b_;
int st;
/*
* Local packs tend to contain objects specific to our
* variant of the project than remote ones. In addition,
* remote ones could be on a network mounted filesystem.
* Favor local ones for these reasons.
*/
st = a->pack_local - b->pack_local;
if (st)
return -st;
/*
* Younger packs tend to contain more recent objects,
* and more recent objects tend to get accessed more
* often.
*/
if (a->mtime < b->mtime)
return 1;
else if (a->mtime == b->mtime)
return 0;
return -1;
}
static void rearrange_packed_git(void)
{
packed_git = llist_mergesort(packed_git, get_next_packed_git,
set_next_packed_git, sort_pack);
}
find_pack_entry: replace last_found_pack with MRU cache Each pack has an index for looking up entries in O(log n) time, but if we have multiple packs, we have to scan through them linearly. This can produce a measurable overhead for some operations. We dealt with this long ago in f7c22cc (always start looking up objects in the last used pack first, 2007-05-30), which keeps what is essentially a 1-element most-recently-used cache. In theory, we should be able to do better by keeping a similar but longer cache, that is the same length as the pack-list itself. Since we now have a convenient generic MRU structure, we can plug it in and measure. Here are the numbers for running p5303 against linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8% 5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0% 5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3% 5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6% 5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7% 5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9% The main numbers of interest here are the rev-list ones (since that is exercising the normal object lookup code path). The single-pack case shouldn't improve at all; the 260ms speedup there is just part of the run-to-run noise (but it's important to note that we didn't make anything worse with the overhead of maintaining our cache). In the 50-pack case, we see similar results. There may be a slight improvement, but it's mostly within the noise. The 1000-pack case does show a big improvement, though. That carries over to the repack case, as well. Even though we haven't touched its pack-search loop yet, it does still do a lot of normal object lookups (e.g., for the internal revision walk), and so improves. As a point of reference, I also ran the 1000-pack test against a version of HEAD^ with the last_found_pack optimization disabled. It takes ~60s, so that gives an indication of how much even the single-element cache is helping. For comparison, here's a smaller repository, git.git: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3% 5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1% 5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6% 5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0% 5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8% 5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7% You can see that the percentage improvement is similar. That's because the lookup we are optimizing is roughly O(nr_objects * nr_packs). Since the number of packs is constant in both tests, we'd expect the improvement to be linear in the number of objects. But the whole process is also linear in the number of objects, so the improvement is a constant factor. The exact improvement does also depend on the contents of the packs. In p5303, the extra packs all have 5 first-parent commits in them, which is a reasonable simulation of a pushed-to repository. But it also means that only 250 first-parent commits are in those packs (compared to almost 50,000 total in linux.git), and the rest are in the huge "base" pack. So once we start looking at history in taht big pack, that's where we'll find most everything, and even the 1-element cache gets close to 100% cache hits. You could almost certainly show better numbers with a more pathological case (e.g., distributing the objects more evenly across the packs). But that's simply not that realistic a scenario, so it makes more sense to focus on these numbers. The implementation itself is a straightforward application of the MRU code. We provide an MRU-ordered list of packs that shadows the packed_git list. This is easy to do because we only create and revise the pack list in one place. The "reprepare" code path actually drops the whole MRU and replaces it for simplicity. It would be more efficient to just add new entries, but there's not much point in optimizing here; repreparing happens rarely, and only after doing a lot of other expensive work. The key things to keep optimized are traversal (which is just a normal linked list, albeit with one extra level of indirection over the regular packed_git list), and marking (which is a constant number of pointer assignments, though slightly more than the old last_found_pack was; it doesn't seem to create a measurable slowdown, though). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
static void prepare_packed_git_mru(void)
{
struct packed_git *p;
mru_clear(packed_git_mru);
for (p = packed_git; p; p = p->next)
mru_append(packed_git_mru, p);
}
static int prepare_packed_git_run_once = 0;
void prepare_packed_git(void)
{
struct alternate_object_database *alt;
if (prepare_packed_git_run_once)
return;
prepare_packed_git_one(get_object_directory(), 1);
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next)
prepare_packed_git_one(alt->path, 0);
rearrange_packed_git();
find_pack_entry: replace last_found_pack with MRU cache Each pack has an index for looking up entries in O(log n) time, but if we have multiple packs, we have to scan through them linearly. This can produce a measurable overhead for some operations. We dealt with this long ago in f7c22cc (always start looking up objects in the last used pack first, 2007-05-30), which keeps what is essentially a 1-element most-recently-used cache. In theory, we should be able to do better by keeping a similar but longer cache, that is the same length as the pack-list itself. Since we now have a convenient generic MRU structure, we can plug it in and measure. Here are the numbers for running p5303 against linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8% 5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0% 5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3% 5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6% 5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7% 5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9% The main numbers of interest here are the rev-list ones (since that is exercising the normal object lookup code path). The single-pack case shouldn't improve at all; the 260ms speedup there is just part of the run-to-run noise (but it's important to note that we didn't make anything worse with the overhead of maintaining our cache). In the 50-pack case, we see similar results. There may be a slight improvement, but it's mostly within the noise. The 1000-pack case does show a big improvement, though. That carries over to the repack case, as well. Even though we haven't touched its pack-search loop yet, it does still do a lot of normal object lookups (e.g., for the internal revision walk), and so improves. As a point of reference, I also ran the 1000-pack test against a version of HEAD^ with the last_found_pack optimization disabled. It takes ~60s, so that gives an indication of how much even the single-element cache is helping. For comparison, here's a smaller repository, git.git: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3% 5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1% 5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6% 5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0% 5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8% 5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7% You can see that the percentage improvement is similar. That's because the lookup we are optimizing is roughly O(nr_objects * nr_packs). Since the number of packs is constant in both tests, we'd expect the improvement to be linear in the number of objects. But the whole process is also linear in the number of objects, so the improvement is a constant factor. The exact improvement does also depend on the contents of the packs. In p5303, the extra packs all have 5 first-parent commits in them, which is a reasonable simulation of a pushed-to repository. But it also means that only 250 first-parent commits are in those packs (compared to almost 50,000 total in linux.git), and the rest are in the huge "base" pack. So once we start looking at history in taht big pack, that's where we'll find most everything, and even the 1-element cache gets close to 100% cache hits. You could almost certainly show better numbers with a more pathological case (e.g., distributing the objects more evenly across the packs). But that's simply not that realistic a scenario, so it makes more sense to focus on these numbers. The implementation itself is a straightforward application of the MRU code. We provide an MRU-ordered list of packs that shadows the packed_git list. This is easy to do because we only create and revise the pack list in one place. The "reprepare" code path actually drops the whole MRU and replaces it for simplicity. It would be more efficient to just add new entries, but there's not much point in optimizing here; repreparing happens rarely, and only after doing a lot of other expensive work. The key things to keep optimized are traversal (which is just a normal linked list, albeit with one extra level of indirection over the regular packed_git list), and marking (which is a constant number of pointer assignments, though slightly more than the old last_found_pack was; it doesn't seem to create a measurable slowdown, though). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
prepare_packed_git_mru();
prepare_packed_git_run_once = 1;
}
void reprepare_packed_git(void)
{
approximate_object_count_valid = 0;
prepare_packed_git_run_once = 0;
prepare_packed_git();
}
static void mark_bad_packed_object(struct packed_git *p,
const unsigned char *sha1)
{
unsigned i;
for (i = 0; i < p->num_bad_objects; i++)
if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
return;
p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
st_mult(GIT_MAX_RAWSZ,
st_add(p->num_bad_objects, 1)));
hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
p->num_bad_objects++;
}
static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
{
struct packed_git *p;
unsigned i;
for (p = packed_git; p; p = p->next)
for (i = 0; i < p->num_bad_objects; i++)
if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
return p;
return NULL;
}
/*
* With an in-core object data in "map", rehash it to make sure the
* object name actually matches "sha1" to detect object corruption.
* With "map" == NULL, try reading the object named with "sha1" using
* the streaming interface and rehash it to do the same.
*/
int check_sha1_signature(const unsigned char *sha1, void *map,
unsigned long size, const char *type)
{
unsigned char real_sha1[20];
enum object_type obj_type;
struct git_istream *st;
git_SHA_CTX c;
char hdr[32];
int hdrlen;
if (map) {
hash_sha1_file(map, size, type, real_sha1);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}
st = open_istream(sha1, &obj_type, &size, NULL);
if (!st)
return -1;
/* Generate the header */
hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1;
/* Sha1.. */
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, hdrlen);
for (;;) {
char buf[1024 * 16];
ssize_t readlen = read_istream(st, buf, sizeof(buf));
if (readlen < 0) {
close_istream(st);
return -1;
}
if (!readlen)
break;
git_SHA1_Update(&c, buf, readlen);
}
git_SHA1_Final(real_sha1, &c);
close_istream(st);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}
int git_open_cloexec(const char *name, int flags)
{
int fd;
static int o_cloexec = O_CLOEXEC;
fd = open(name, flags | o_cloexec);
if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) {
/* Try again w/o O_CLOEXEC: the kernel might not support it */
o_cloexec &= ~O_CLOEXEC;
fd = open(name, flags | o_cloexec);
}
#if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
{
static int fd_cloexec = FD_CLOEXEC;
if (!o_cloexec && 0 <= fd && fd_cloexec) {
/* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */
int flags = fcntl(fd, F_GETFD);
if (fcntl(fd, F_SETFD, flags | fd_cloexec))
fd_cloexec = 0;
}
}
#endif
return fd;
}
/*
* Find "sha1" as a loose object in the local repository or in an alternate.
* Returns 0 on success, negative on failure.
*
* The "path" out-parameter will give the path of the object we found (if any).
* Note that it may point to static storage and is only valid until another
* call to sha1_file_name(), etc.
*/
static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
const char **path)
{
struct alternate_object_database *alt;
*path = sha1_file_name(sha1);
if (!lstat(*path, st))
return 0;
prepare_alt_odb();
errno = ENOENT;
for (alt = alt_odb_list; alt; alt = alt->next) {
*path = alt_sha1_path(alt, sha1);
if (!lstat(*path, st))
return 0;
}
return -1;
}
/*
* Like stat_sha1_file(), but actually open the object and return the
* descriptor. See the caveats on the "path" parameter above.
*/
static int open_sha1_file(const unsigned char *sha1, const char **path)
{
int fd;
struct alternate_object_database *alt;
int most_interesting_errno;
*path = sha1_file_name(sha1);
fd = git_open(*path);
if (fd >= 0)
return fd;
most_interesting_errno = errno;
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
*path = alt_sha1_path(alt, sha1);
fd = git_open(*path);
if (fd >= 0)
return fd;
if (most_interesting_errno == ENOENT)
most_interesting_errno = errno;
}
errno = most_interesting_errno;
return -1;
}
/*
* Map the loose object at "path" if it is not NULL, or the path found by
* searching for a loose object named "sha1".
*/
static void *map_sha1_file_1(const char *path,
const unsigned char *sha1,
unsigned long *size)
{
void *map;
int fd;
if (path)
fd = git_open(path);
else
fd = open_sha1_file(sha1, &path);
map = NULL;
if (fd >= 0) {
struct stat st;
if (!fstat(fd, &st)) {
*size = xsize_t(st.st_size);
if (!*size) {
/* mmap() is forbidden on empty files */
error("object file %s is empty", path);
return NULL;
}
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
}
close(fd);
}
return map;
}
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
{
return map_sha1_file_1(NULL, sha1, size);
}
unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
Fix big left-shifts of unsigned char Shifting 'unsigned char' or 'unsigned short' left can result in sign extension errors, since the C integer promotion rules means that the unsigned char/short will get implicitly promoted to a signed 'int' due to the shift (or due to other operations). This normally doesn't matter, but if you shift things up sufficiently, it will now set the sign bit in 'int', and a subsequent cast to a bigger type (eg 'long' or 'unsigned long') will now sign-extend the value despite the original expression being unsigned. One example of this would be something like unsigned long size; unsigned char c; size += c << 24; where despite all the variables being unsigned, 'c << 24' ends up being a signed entity, and will get sign-extended when then doing the addition in an 'unsigned long' type. Since git uses 'unsigned char' pointers extensively, we actually have this bug in a couple of places. I may have missed some, but this is the result of looking at git grep '[^0-9 ][ ]*<<[ ][a-z]' -- '*.c' '*.h' git grep '<<[ ]*24' which catches at least the common byte cases (shifting variables by a variable amount, and shifting by 24 bits). I also grepped for just 'unsigned char' variables in general, and converted the ones that most obviously ended up getting implicitly cast immediately anyway (eg hash_name(), encode_85()). In addition to just avoiding 'unsigned char', this patch also tries to use a common idiom for the delta header size thing. We had three different variations on it: "& 0x7fUL" in one place (getting the sign extension right), and "& ~0x80" and "& 0x7f" in two other places (not getting it right). Apart from making them all just avoid using "unsigned char" at all, I also unified them to then use a simple "& 0x7f". I considered making a sparse extension which warns about doing implicit casts from unsigned types to signed types, but it gets rather complex very quickly, so this is just a hack. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-18 02:22:27 +02:00
unsigned long size, c;
unsigned long used = 0;
c = buf[used++];
*type = (c >> 4) & 7;
size = c & 15;
shift = 4;
while (c & 0x80) {
if (len <= used || bitsizeof(long) <= shift) {
error("bad object header");
size = used = 0;
break;
}
c = buf[used++];
size += (c & 0x7f) << shift;
shift += 7;
}
*sizep = size;
return used;
}
unpack_sha1_header(): detect malformed object header When opening a loose object file, we often do this sequence: - prepare a short buffer for the object header (on stack) - call unpack_sha1_header() and have early part of the object data inflated, enough to fill the buffer - parse that data in the short buffer, assuming that the first part of the object is <typename> SP <length> NUL Because the parsing function parse_sha1_header_extended() is not given the number of bytes inflated into the header buffer, it you craft a file whose early part inflates a garbage sequence without SP or NUL, and replace a loose object with it, it will end up reading past the end of the inflated data. To correct this, do the following four things: - rename unpack_sha1_header() to unpack_sha1_short_header() and have unpack_sha1_header_to_strbuf() keep calling that as its helper function. This will detect and report zlib errors, but is not aware of the format of a loose object (as before). - introduce unpack_sha1_header() that calls the same helper function, and when zlib reports it inflated OK into the buffer, check if the inflated data has NUL. This would ensure that parsing function will terminate within the buffer that holds the inflated header. - update unpack_sha1_header_to_strbuf() to check if the resulting buffer has NUL for the same effect. - update parse_sha1_header_extended() to make sure that its loop to find the SP that terminates the <typename> stops at NUL. Essentially, this makes unpack_*() functions that are asked to unpack a loose object header to be a bit more strict and detect an input that cannot possibly be a valid object header, even before the parsing function kicks in. Reported-by: Gustavo Grieco <gustavo.grieco@imag.fr> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 06:29:04 +02:00
static int unpack_sha1_short_header(git_zstream *stream,
unsigned char *map, unsigned long mapsize,
void *buffer, unsigned long bufsiz)
{
/* Get the data stream */
memset(stream, 0, sizeof(*stream));
stream->next_in = map;
stream->avail_in = mapsize;
stream->next_out = buffer;
stream->avail_out = bufsiz;
git_inflate_init(stream);
return git_inflate(stream, 0);
}
unpack_sha1_header(): detect malformed object header When opening a loose object file, we often do this sequence: - prepare a short buffer for the object header (on stack) - call unpack_sha1_header() and have early part of the object data inflated, enough to fill the buffer - parse that data in the short buffer, assuming that the first part of the object is <typename> SP <length> NUL Because the parsing function parse_sha1_header_extended() is not given the number of bytes inflated into the header buffer, it you craft a file whose early part inflates a garbage sequence without SP or NUL, and replace a loose object with it, it will end up reading past the end of the inflated data. To correct this, do the following four things: - rename unpack_sha1_header() to unpack_sha1_short_header() and have unpack_sha1_header_to_strbuf() keep calling that as its helper function. This will detect and report zlib errors, but is not aware of the format of a loose object (as before). - introduce unpack_sha1_header() that calls the same helper function, and when zlib reports it inflated OK into the buffer, check if the inflated data has NUL. This would ensure that parsing function will terminate within the buffer that holds the inflated header. - update unpack_sha1_header_to_strbuf() to check if the resulting buffer has NUL for the same effect. - update parse_sha1_header_extended() to make sure that its loop to find the SP that terminates the <typename> stops at NUL. Essentially, this makes unpack_*() functions that are asked to unpack a loose object header to be a bit more strict and detect an input that cannot possibly be a valid object header, even before the parsing function kicks in. Reported-by: Gustavo Grieco <gustavo.grieco@imag.fr> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 06:29:04 +02:00
int unpack_sha1_header(git_zstream *stream,
unsigned char *map, unsigned long mapsize,
void *buffer, unsigned long bufsiz)
{
int status = unpack_sha1_short_header(stream, map, mapsize,
buffer, bufsiz);
if (status < Z_OK)
return status;
/* Make sure we have the terminating NUL */
if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return -1;
return 0;
}
static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
unsigned long mapsize, void *buffer,
unsigned long bufsiz, struct strbuf *header)
{
int status;
unpack_sha1_header(): detect malformed object header When opening a loose object file, we often do this sequence: - prepare a short buffer for the object header (on stack) - call unpack_sha1_header() and have early part of the object data inflated, enough to fill the buffer - parse that data in the short buffer, assuming that the first part of the object is <typename> SP <length> NUL Because the parsing function parse_sha1_header_extended() is not given the number of bytes inflated into the header buffer, it you craft a file whose early part inflates a garbage sequence without SP or NUL, and replace a loose object with it, it will end up reading past the end of the inflated data. To correct this, do the following four things: - rename unpack_sha1_header() to unpack_sha1_short_header() and have unpack_sha1_header_to_strbuf() keep calling that as its helper function. This will detect and report zlib errors, but is not aware of the format of a loose object (as before). - introduce unpack_sha1_header() that calls the same helper function, and when zlib reports it inflated OK into the buffer, check if the inflated data has NUL. This would ensure that parsing function will terminate within the buffer that holds the inflated header. - update unpack_sha1_header_to_strbuf() to check if the resulting buffer has NUL for the same effect. - update parse_sha1_header_extended() to make sure that its loop to find the SP that terminates the <typename> stops at NUL. Essentially, this makes unpack_*() functions that are asked to unpack a loose object header to be a bit more strict and detect an input that cannot possibly be a valid object header, even before the parsing function kicks in. Reported-by: Gustavo Grieco <gustavo.grieco@imag.fr> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 06:29:04 +02:00
status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);
if (status < Z_OK)
return -1;
/*
* Check if entire header is unpacked in the first iteration.
*/
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0;
/*
* buffer[0..bufsiz] was not large enough. Copy the partial
* result out to header, and then append the result of further
* reading the stream.
*/
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
stream->next_out = buffer;
stream->avail_out = bufsiz;
do {
status = git_inflate(stream, 0);
strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
return 0;
stream->next_out = buffer;
stream->avail_out = bufsiz;
} while (status != Z_STREAM_END);
return -1;
}
2011-06-10 20:52:15 +02:00
static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
{
int bytes = strlen(buffer) + 1;
unsigned char *buf = xmallocz(size);
unsigned long n;
int status = Z_OK;
n = stream->total_out - bytes;
if (n > size)
n = size;
memcpy(buf, (char *) buffer + bytes, n);
bytes = n;
if (bytes <= size) {
/*
* The above condition must be (bytes <= size), not
* (bytes < size). In other words, even though we
* expect no more output and set avail_out to zero,
* the input zlib stream may have bytes that express
* "this concludes the stream", and we *do* want to
* eat that input.
*
* Otherwise we would not be able to test that we
* consumed all the input to reach the expected size;
* we also want to check that zlib tells us that all
* went well with status == Z_STREAM_END at the end.
*/
stream->next_out = buf + bytes;
stream->avail_out = size - bytes;
while (status == Z_OK)
status = git_inflate(stream, Z_FINISH);
}
if (status == Z_STREAM_END && !stream->avail_in) {
git_inflate_end(stream);
return buf;
}
if (status < 0)
error("corrupt loose object '%s'", sha1_to_hex(sha1));
else if (stream->avail_in)
error("garbage at end of loose object '%s'",
sha1_to_hex(sha1));
free(buf);
return NULL;
}
/*
* We used to just use "sscanf()", but that's actually way
* too permissive for what we want to check. So do an anal
* object header parse by hand.
*/
static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
unsigned int flags)
{
const char *type_buf = hdr;
unsigned long size;
int type, type_len = 0;
/*
* The type can be of any size but is followed by
* a space.
*/
for (;;) {
char c = *hdr++;
unpack_sha1_header(): detect malformed object header When opening a loose object file, we often do this sequence: - prepare a short buffer for the object header (on stack) - call unpack_sha1_header() and have early part of the object data inflated, enough to fill the buffer - parse that data in the short buffer, assuming that the first part of the object is <typename> SP <length> NUL Because the parsing function parse_sha1_header_extended() is not given the number of bytes inflated into the header buffer, it you craft a file whose early part inflates a garbage sequence without SP or NUL, and replace a loose object with it, it will end up reading past the end of the inflated data. To correct this, do the following four things: - rename unpack_sha1_header() to unpack_sha1_short_header() and have unpack_sha1_header_to_strbuf() keep calling that as its helper function. This will detect and report zlib errors, but is not aware of the format of a loose object (as before). - introduce unpack_sha1_header() that calls the same helper function, and when zlib reports it inflated OK into the buffer, check if the inflated data has NUL. This would ensure that parsing function will terminate within the buffer that holds the inflated header. - update unpack_sha1_header_to_strbuf() to check if the resulting buffer has NUL for the same effect. - update parse_sha1_header_extended() to make sure that its loop to find the SP that terminates the <typename> stops at NUL. Essentially, this makes unpack_*() functions that are asked to unpack a loose object header to be a bit more strict and detect an input that cannot possibly be a valid object header, even before the parsing function kicks in. Reported-by: Gustavo Grieco <gustavo.grieco@imag.fr> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-09-26 06:29:04 +02:00
if (!c)
return -1;
if (c == ' ')
break;
type_len++;
}
type = type_from_string_gently(type_buf, type_len, 1);
if (oi->typename)
strbuf_add(oi->typename, type_buf, type_len);
/*
* Set type to 0 if its an unknown object and
* we're obtaining the type using '--allow-unknown-type'
* option.
*/
if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0))
type = 0;
else if (type < 0)
die("invalid object type");
if (oi->typep)
*oi->typep = type;
/*
* The length must follow immediately, and be in canonical
* decimal format (ie "010" is not valid).
*/
size = *hdr++ - '0';
if (size > 9)
return -1;
if (size) {
for (;;) {
unsigned long c = *hdr - '0';
if (c > 9)
break;
hdr++;
size = size * 10 + c;
}
}
if (oi->sizep)
*oi->sizep = size;
/*
* The length must be followed by a zero byte
*/
return *hdr ? -1 : type;
}
int parse_sha1_header(const char *hdr, unsigned long *sizep)
{
provide an initializer for "struct object_info" An all-zero initializer is fine for this struct, but because the first element is a pointer, call sites need to know to use "NULL" instead of "0". Otherwise some static checkers like "sparse" will complain; see d099b71 (Fix some sparse warnings, 2013-07-18) for example. So let's provide an initializer to make this easier to get right. But let's also comment that memset() to zero is explicitly OK[1]. One of the callers embeds object_info in another struct which is initialized via memset (expand_data in builtin/cat-file.c). Since our subset of C doesn't allow assignment from a compound literal, handling this in any other way is awkward, so we'd like to keep the ability to initialize by memset(). By documenting this property, it should make anybody who wants to change the initializer think twice before doing so. There's one other caller of interest. In parse_sha1_header(), we did not initialize the struct fully in the first place. This turned out not to be a bug because the sub-function it calls does not look at any other fields except the ones we did initialize. But that assumption might not hold in the future, so it's a dangerous construct. This patch switches it to initializing the whole struct, which protects us against unexpected reads of the other fields. [1] Obviously using memset() to initialize a pointer violates the C standard, but we long ago decided that it was an acceptable tradeoff in the real world. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-11 11:24:35 +02:00
struct object_info oi = OBJECT_INFO_INIT;
oi.sizep = sizep;
return parse_sha1_header_extended(hdr, &oi, 0);
}
unsigned long get_size_from_delta(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos)
{
const unsigned char *data;
unsigned char delta_head[20], *in;
2011-06-10 20:52:15 +02:00
git_zstream stream;
int st;
memset(&stream, 0, sizeof(stream));
stream.next_out = delta_head;
stream.avail_out = sizeof(delta_head);
git_inflate_init(&stream);
do {
in = use_pack(p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
st = git_inflate(&stream, Z_FINISH);
curpos += stream.next_in - in;
} while ((st == Z_OK || st == Z_BUF_ERROR) &&
stream.total_out < sizeof(delta_head));
git_inflate_end(&stream);
if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
error("delta data unpack-initial failed");
return 0;
}
/* Examine the initial part of the delta to figure out
* the result size.
*/
data = delta_head;
/* ignore base size */
get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
/* Read the result size */
return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
}
static off_t get_delta_base(struct packed_git *p,
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
struct pack_window **w_curs,
off_t *curpos,
enum object_type type,
off_t delta_obj_offset)
{
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
off_t base_offset;
/* use_pack() assured us we have [base_info, base_info + 20)
* as a range that we can look at without walking off the
* end of the mapped window. Its actually the hash size
* that is assured. An OFS_DELTA longer than the hash size
* is stupid, as then a REF_DELTA would be smaller to store.
*/
if (type == OBJ_OFS_DELTA) {
unsigned used = 0;
unsigned char c = base_info[used++];
base_offset = c & 127;
while (c & 128) {
base_offset += 1;
if (!base_offset || MSB(base_offset, 7))
return 0; /* overflow */
c = base_info[used++];
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = delta_obj_offset - base_offset;
if (base_offset <= 0 || base_offset >= delta_obj_offset)
return 0; /* out of bound */
*curpos += used;
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
base_offset = find_pack_entry_one(base_info, p);
*curpos += 20;
} else
die("I am totally screwed");
return base_offset;
}
/*
* Like get_delta_base above, but we return the sha1 instead of the pack
* offset. This means it is cheaper for REF deltas (we do not have to do
* the final object lookup), but more expensive for OFS deltas (we
* have to load the revidx to convert the offset back into a sha1).
*/
static const unsigned char *get_delta_base_sha1(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
enum object_type type,
off_t delta_obj_offset)
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
return base;
} else if (type == OBJ_OFS_DELTA) {
struct revindex_entry *revidx;
off_t base_offset = get_delta_base(p, w_curs, &curpos,
type, delta_obj_offset);
if (!base_offset)
return NULL;
revidx = find_pack_revindex(p, base_offset);
if (!revidx)
return NULL;
return nth_packed_object_sha1(p, revidx->nr);
} else
return NULL;
}
int unpack_object_header(struct packed_git *p,
struct pack_window **w_curs,
off_t *curpos,
unsigned long *sizep)
{
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
unsigned char *base;
2011-06-10 20:52:15 +02:00
unsigned long left;
unsigned long used;
enum object_type type;
/* use_pack() assures us we have [base, base + 20) available
* as a range that we can look at. (Its actually the hash
* size that is assured.) With our object header encoding
* the maximum deflated object size is 2^137, which is just
* insane, so we know won't exceed what we have been given.
*/
base = use_pack(p, w_curs, *curpos, &left);
used = unpack_object_header_buffer(base, left, &type, sizep);
if (!used) {
type = OBJ_BAD;
} else
*curpos += used;
return type;
}
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
{
int type;
struct revindex_entry *revidx;
const unsigned char *sha1;
revidx = find_pack_revindex(p, obj_offset);
if (!revidx)
return OBJ_BAD;
sha1 = nth_packed_object_sha1(p, revidx->nr);
mark_bad_packed_object(p, sha1);
type = sha1_object_info(sha1, NULL);
if (type <= OBJ_NONE)
return OBJ_BAD;
return type;
}
#define POI_STACK_PREALLOC 64
static enum object_type packed_to_object_type(struct packed_git *p,
off_t obj_offset,
enum object_type type,
struct pack_window **w_curs,
off_t curpos)
{
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
off_t small_poi_stack[POI_STACK_PREALLOC];
off_t *poi_stack = small_poi_stack;
int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t base_offset;
unsigned long size;
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
/* Push the object we're going to leave behind */
if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
poi_stack_alloc = alloc_nr(poi_stack_nr);
ALLOC_ARRAY(poi_stack, poi_stack_alloc);
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
} else {
ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
}
poi_stack[poi_stack_nr++] = obj_offset;
/* If parsing the base offset fails, just unwind */
base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
if (!base_offset)
goto unwind;
curpos = obj_offset = base_offset;
type = unpack_object_header(p, w_curs, &curpos, &size);
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
if (type <= OBJ_NONE) {
/* If getting the base itself fails, we first
* retry the base, otherwise unwind */
type = retry_bad_packed_offset(p, base_offset);
if (type > OBJ_NONE)
goto out;
goto unwind;
}
}
switch (type) {
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
case OBJ_BAD:
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
break;
default:
error("unknown object type %i at offset %"PRIuMAX" in %s",
type, (uintmax_t)obj_offset, p->pack_name);
type = OBJ_BAD;
}
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
out:
if (poi_stack != small_poi_stack)
free(poi_stack);
return type;
sha1_file: remove recursion in packed_object_info packed_object_info() and packed_delta_info() were mutually recursive. The former would handle ordinary types and defer deltas to the latter; the latter would use the former to resolve the delta base. This arrangement, however, leads to trouble with threaded index-pack and long delta chains on platforms where thread stacks are small, as happened on OS X (512kB thread stacks by default) with the chromium repo. The task of the two functions is not all that hard to describe without any recursion, however. It proceeds in three steps: - determine the representation type and size, based on the outermost object (delta or not) - follow through the delta chain, if any - determine the object type from what is found at the end of the delta chain The only complication stems from the error recovery. If parsing fails at any step, we want to mark that object (within the pack) as bad and try getting the corresponding SHA1 from elsewhere. If that also fails, we want to repeat this process back up the delta chain until we find a reasonable solution or conclude that there is no way to reconstruct the object. (This is conveniently checked by t5303.) To achieve that within the pack, we keep track of the entire delta chain in a stack. When things go sour, we process that stack from the top, marking entries as bad and attempting to re-resolve by sha1. To avoid excessive malloc(), the stack starts out with a small stack-allocated array. The choice of 64 is based on the default of pack.depth, which is 50, in the hope that it covers "most" delta chains without any need for malloc(). It's much harder to make the actual re-resolving by sha1 nonrecursive, so we skip that. If you can't afford *that* recursion, your corruption problems are more serious than your stack size problems. Reported-by: Stefan Zager <szager@google.com> Signed-off-by: Thomas Rast <trast@student.ethz.ch> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-25 19:07:39 +01:00
unwind:
while (poi_stack_nr) {
obj_offset = poi_stack[--poi_stack_nr];
type = retry_bad_packed_offset(p, obj_offset);
if (type > OBJ_NONE)
goto out;
}
type = OBJ_BAD;
goto out;
}
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
static struct hashmap delta_base_cache;
static size_t delta_base_cached;
static LIST_HEAD(delta_base_cache_lru);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
struct delta_base_cache_key {
struct packed_git *p;
off_t base_offset;
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
};
struct delta_base_cache_entry {
struct hashmap hash;
struct delta_base_cache_key key;
struct list_head lru;
void *data;
unsigned long size;
enum object_type type;
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
};
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
unsigned int hash;
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
hash += (hash >> 8) + (hash >> 16);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
return hash;
}
static struct delta_base_cache_entry *
get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
struct hashmap_entry entry;
struct delta_base_cache_key key;
if (!delta_base_cache.cmpfn)
return NULL;
hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
key.p = p;
key.base_offset = base_offset;
return hashmap_get(&delta_base_cache, &entry, &key);
}
static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
const struct delta_base_cache_key *b)
{
return a->p == b->p && a->base_offset == b->base_offset;
}
static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
const void *va, const void *vb,
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
const void *vkey)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
const struct delta_base_cache_entry *a = va, *b = vb;
const struct delta_base_cache_key *key = vkey;
if (key)
return !delta_base_cache_key_eq(&a->key, key);
else
return !delta_base_cache_key_eq(&a->key, &b->key);
}
static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
return !!get_delta_base_cache_entry(p, base_offset);
}
/*
* Remove the entry from the cache, but do _not_ free the associated
* entry data. The caller takes ownership of the "data" buffer, and
* should copy out any fields it wants before detaching.
*/
static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
hashmap_remove(&delta_base_cache, ent, &ent->key);
list_del(&ent->lru);
delta_base_cached -= ent->size;
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
free(ent);
}
static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
unsigned long *base_size, enum object_type *type)
{
struct delta_base_cache_entry *ent;
ent = get_delta_base_cache_entry(p, base_offset);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
if (!ent)
return unpack_entry(p, base_offset, type, base_size);
if (type)
*type = ent->type;
if (base_size)
*base_size = ent->size;
return xmemdupz(ent->data, ent->size);
}
static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
free(ent->data);
detach_delta_base_cache_entry(ent);
}
void clear_delta_base_cache(void)
{
clear_delta_base_cache(): don't modify hashmap while iterating On Thu, Jan 19, 2017 at 03:03:46PM +0100, Ulrich Spörlein wrote: > > I suspect the patch below may fix things for you. It works around it by > > walking over the lru list (either is fine, as they both contain all > > entries, and since we're clearing everything, we don't care about the > > order). > > Confirmed. With the patch applied, I can import the whole 55G in one go > without any crashes or aborts. Thanks much! Thanks. Here it is rolled up with a commit message. -- >8 -- Subject: clear_delta_base_cache(): don't modify hashmap while iterating Removing entries while iterating causes fast-import to access an already-freed `struct packed_git`, leading to various confusing errors. What happens is that clear_delta_base_cache() drops the whole contents of the cache by iterating over the hashmap, calling release_delta_base_cache() on each entry. That function removes the item from the hashmap. The hashmap code may then shrink the table, but the hashmap_iter struct retains an offset from the old table. As a result, the next call to hashmap_iter_next() may claim that the iteration is done, even though some items haven't been visited. The only caller of clear_delta_base_cache() is fast-import, which wants to clear the cache because it is discarding the packed_git struct for its temporary pack. So by failing to remove all of the entries, we still have references to the freed packed_git. To make things even more confusing, this doesn't seem to trigger with the test suite, because it depends on complexities like the size of the hash table, which entries got cleared, whether we try to access them before they're evicted from the cache, etc. So I've been able to identify the problem with large imports like freebsd's svn import, or a fast-export of linux.git. But nothing that would be reasonable to run as part of the normal test suite. We can fix this easily by iterating over the lru linked list instead of the hashmap. They both contain the same entries, and we can use the "safe" variant of the list iterator, which exists for exactly this case. Let's also add a warning to the hashmap API documentation to reduce the chances of getting bit by this again. Reported-by: Ulrich Spörlein <uqs@freebsd.org> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-01-19 17:33:50 +01:00
struct list_head *lru, *tmp;
list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
struct delta_base_cache_entry *entry =
list_entry(lru, struct delta_base_cache_entry, lru);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
release_delta_base_cache(entry);
}
}
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
void *base, unsigned long base_size, enum object_type type)
{
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
struct list_head *lru, *tmp;
delta_base_cached += base_size;
list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
struct delta_base_cache_entry *f =
list_entry(lru, struct delta_base_cache_entry, lru);
if (delta_base_cached <= delta_base_cache_limit)
break;
release_delta_base_cache(f);
}
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
ent->key.p = p;
ent->key.base_offset = base_offset;
ent->type = type;
ent->data = base;
ent->size = base_size;
list_add_tail(&ent->lru, &delta_base_cache_lru);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
if (!delta_base_cache.cmpfn)
hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
hashmap_add(&delta_base_cache, ent);
}
int packed_object_info(struct packed_git *p, off_t obj_offset,
struct object_info *oi)
{
struct pack_window *w_curs = NULL;
unsigned long size;
off_t curpos = obj_offset;
enum object_type type;
/*
* We always get the representation type, but only convert it to
* a "real" type later if the caller is interested.
*/
if (oi->contentp) {
*oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
&type);
if (!*oi->contentp)
type = OBJ_BAD;
} else {
type = unpack_object_header(p, &w_curs, &curpos, &size);
}
if (!oi->contentp && oi->sizep) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t tmp_pos = curpos;
off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
type, obj_offset);
if (!base_offset) {
type = OBJ_BAD;
goto out;
}
*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
if (*oi->sizep == 0) {
type = OBJ_BAD;
goto out;
}
} else {
*oi->sizep = size;
}
}
if (oi->disk_sizep) {
struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
*oi->disk_sizep = revidx[1].offset - obj_offset;
}
if (oi->typep || oi->typename) {
enum object_type ptot;
ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
curpos);
if (oi->typep)
*oi->typep = ptot;
if (oi->typename) {
const char *tn = typename(ptot);
if (tn)
strbuf_addstr(oi->typename, tn);
}
if (ptot < 0) {
type = OBJ_BAD;
goto out;
}
}
if (oi->delta_base_sha1) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
const unsigned char *base;
base = get_delta_base_sha1(p, &w_curs, curpos,
type, obj_offset);
if (!base) {
type = OBJ_BAD;
goto out;
}
hashcpy(oi->delta_base_sha1, base);
} else
hashclr(oi->delta_base_sha1);
}
out:
unuse_pack(&w_curs);
return type;
}
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
unsigned long size)
{
int st;
git_zstream stream;
unsigned char *buffer, *in;
buffer = xmallocz_gently(size);
if (!buffer)
return NULL;
memset(&stream, 0, sizeof(stream));
stream.next_out = buffer;
stream.avail_out = size + 1;
git_inflate_init(&stream);
do {
in = use_pack(p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
st = git_inflate(&stream, Z_FINISH);
if (!stream.avail_out)
break; /* the payload is larger than it should be */
curpos += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR);
git_inflate_end(&stream);
if ((st != Z_STREAM_END) || stream.total_out != size) {
free(buffer);
return NULL;
}
return buffer;
}
static void *read_object(const unsigned char *sha1, enum object_type *type,
unsigned long *size);
static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
{
static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
p->pack_name, (uintmax_t)obj_offset);
}
close another possibility for propagating pack corruption Abstract -------- With index v2 we have a per object CRC to allow quick and safe reuse of pack data when repacking. This, however, doesn't currently prevent a stealth corruption from being propagated into a new pack when _not_ reusing pack data as demonstrated by the modification to t5302 included here. The Context ----------- The Git database is all checksummed with SHA1 hashes. Any kind of corruption can be confirmed by verifying this per object hash against corresponding data. However this can be costly to perform systematically and therefore this check is often not performed at run time when accessing the object database. First, the loose object format is entirely compressed with zlib which already provide a CRC verification of its own when inflating data. Any disk corruption would be caught already in this case. Then, packed objects are also compressed with zlib but only for their actual payload. The object headers and delta base references are not deflated for obvious performance reasons, however this leave them vulnerable to potentially undetected disk corruptions. Object types are often validated against the expected type when they're requested, and deflated size must always match the size recorded in the object header, so those cases are pretty much covered as well. Where corruptions could go unnoticed is in the delta base reference. Of course, in the OBJ_REF_DELTA case, the odds for a SHA1 reference to get corrupted so it actually matches the SHA1 of another object with the same size (the delta header stores the expected size of the base object to apply against) are virtually zero. In the OBJ_OFS_DELTA case, the reference is a pack offset which would have to match the start boundary of a different base object but still with the same size, and although this is relatively much more "probable" than in the OBJ_REF_DELTA case, the probability is also about zero in absolute terms. Still, the possibility exists as demonstrated in t5302 and is certainly greater than a SHA1 collision, especially in the OBJ_OFS_DELTA case which is now the default when repacking. Again, repacking by reusing existing pack data is OK since the per object CRC provided by index v2 guards against any such corruptions. What t5302 failed to test is a full repack in such case. The Solution ------------ As unlikely as this kind of stealth corruption can be in practice, it certainly isn't acceptable to propagate it into a freshly created pack. But, because this is so unlikely, we don't want to pay the run time cost associated with extra validation checks all the time either. Furthermore, consequences of such corruption in anything but repacking should be rather visible, and even if it could be quite unpleasant, it still has far less severe consequences than actively creating bad packs. So the best compromize is to check packed object CRC when unpacking objects, and only during the compression/writing phase of a repack, and only when not streaming the result. The cost of this is minimal (less than 1% CPU time), and visible only with a full repack. Someone with a stats background could provide an objective evaluation of this, but I suspect that it's bad RAM that has more potential for data corruptions at this point, even in those cases where this extra check is not performed. Still, it is best to prevent a known hole for corruption when recreating object data into a new pack. What about the streamed pack case? Well, any client receiving a pack must always consider that pack as untrusty and perform full validation anyway, hence no such stealth corruption could be propagated to remote repositoryes already. It is therefore worthless doing local validation in that case. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-10-31 16:31:08 +01:00
int do_check_packed_object_crc;
#define UNPACK_ENTRY_STACK_PREALLOC 64
struct unpack_entry_stack_ent {
off_t obj_offset;
off_t curpos;
unsigned long size;
};
void *unpack_entry(struct packed_git *p, off_t obj_offset,
enum object_type *final_type, unsigned long *final_size)
{
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
struct pack_window *w_curs = NULL;
off_t curpos = obj_offset;
void *data = NULL;
unsigned long size;
enum object_type type;
struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
int base_from_cache = 0;
write_pack_access_log(p, obj_offset);
/* PHASE 1: drill down to the innermost base object */
for (;;) {
off_t base_offset;
int i;
struct delta_base_cache_entry *ent;
ent = get_delta_base_cache_entry(p, curpos);
delta_base_cache: use hashmap.h The fundamental data structure of the delta base cache is a hash table mapping pairs of "(packfile, offset)" into structs containing the actual object data. The hash table implementation dates back to e5e0161 (Implement a simple delta_base cache, 2007-03-17), and uses a fixed-size table. The current size is a hard-coded 256 entries. Because we need to be able to remove objects from the hash table, entry lookup does not do any kind of probing to handle collisions. Colliding items simply replace whatever is in their slot. As a result, we have fewer usable slots than even the 256 we allocate. At half full, each new item has a 50% chance of displacing another one. Or another way to think about it: every item has a 1/256 chance of being ejected due to hash collision, without regard to our LRU strategy. So it would be interesting to see the effect of increasing the cache size on the runtime for some common operations. As with the previous patch, we'll measure "git log --raw" for tree-only operations, and "git log -Sfoo --raw" for operations that touch trees and blobs. All times are wall-clock best-of-3, done against fully packed repos with --depth=50, and the default core.deltaBaseCacheLimit of 96MB. Here are timings for various values of MAX_DELTA_CACHE against git.git (the asterisk marks the minimum time for each operation): MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m02.227s 0m12.821s 512 0m02.143s 0m10.602s 1024 0m02.127s 0m08.642s 2048 0m02.148s 0m07.123s 4096 0m02.194s 0m06.448s* 8192 0m02.239s 0m06.504s 16384 0m02.144s* 0m06.502s 32768 0m02.202s 0m06.622s 65536 0m02.230s 0m06.677s The log-raw case isn't changed much at all here (probably because our trees just aren't that big in the first place, or possibly because we have so _few_ trees in git.git that the 256-entry cache is enough). But once we start putting blobs in the cache, too, we see a big improvement (almost 50%). The curve levels off around 4096, which means that we can hold about that many entries before hitting the 96MB memory limit (or possibly that the workload is small enough that there is simply no more work to be optimized out by caching more). (As a side note, I initially timed my existing git.git pack, which was a base of --aggressive combined with some pulls on top. So it had quite a few deeper delta chains. The 256-cache case was more like 15s, and it still dropped to ~6.5s in the same way). Here are the timings for linux.git: MAX_DELTA_CACHE log-raw log-S --------------- --------- --------- 256 0m41.661s 5m12.410s 512 0m39.547s 5m07.920s 1024 0m37.054s 4m54.666s 2048 0m35.871s 4m41.194s* 4096 0m34.646s 4m51.648s 8192 0m33.881s 4m55.342s 16384 0m35.190s 5m00.122s 32768 0m35.060s 4m58.851s 65536 0m33.311s* 4m51.420s As we grow we see a nice 20% speedup in the tree traversal, and more modest 10% in the log-S. This is probably an indication that we are bound less by the number of entries, and more by the memory limit (more on that below). What is interesting is that the numbers bounce around a bit; increasing the number of entries isn't always a strict improvement. Partially this is due to noise in the measurement. But it may also be an indication that our LRU ejection scheme is not optimal. The smaller cache sizes introduce some randomness into the ejection (due to collisions), which may sometimes work in our favor (and sometimes not!). So what is the optimal setting of MAX_DELTA_CACHE? The "bouncing" in the linux.git log-S numbers notwithstanding, it mostly seems like bigger is better. And even if we were to try to find a "sweet spot", these are just two repositories, that are not necessarily representative. The shape of history, the size of trees and blobs, the memory limit configuration, etc, all will affect the outcome. Rather than trying to find the "right" number, another strategy is to just switch to a hash table that can actually store collisions: namely our hashmap.h implementation. Here are numbers for that compared to the "best" we saw from adjusting MAX_DELTA_CACHE: | log-raw | log-S | best hashmap | best hashmap | --------- --------- | --------- --------- git | 0m02.144s 0m02.144s | 0m06.448s 0m06.688s linux | 0m33.311s 0m33.092s | 4m41.194s 4m57.172s We can see the results are similar in most cases, which is what we'd expect. We're not ejecting due to collisions at all, so this is purely representing the LRU. So really, we'd expect this to model most closely the larger values of the static MAX_DELTA_CACHE limit. And that does seem to be what's happening, including the "bounce" in the linux log-S case. So while the value for that case _isn't_ as good as the optimal one measured above (which was 2048 entries), given the bouncing I'm hesitant to suggest that 2048 is any kind of optimum (not even for linux.git, let alone as a general rule). The generic hashmap has the appeal that it drops the number of tweakable numbers by one, which means we can focus on tuning other elements, like the LRU strategy or the core.deltaBaseCacheLimit setting. And indeed, if we bump the cache limit to 1G (which is probably silly for general use, but maybe something people with big workstations would want to do), the linux.git log-S time drops to 3m32s. That's something you really _can't_ do easily with the static hash table, because the number of entries needs to grow in proportion to the memory limit (so 2048 is almost certainly not going to be the right value there). This patch takes that direction, and drops the static hash table entirely in favor of using the hashmap.h API. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-23 00:00:07 +02:00
if (ent) {
type = ent->type;
data = ent->data;
size = ent->size;
detach_delta_base_cache_entry(ent);
base_from_cache = 1;
break;
}
if (do_check_packed_object_crc && p->index_version > 1) {
struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
off_t len = revidx[1].offset - obj_offset;
if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
const unsigned char *sha1 =
nth_packed_object_sha1(p, revidx->nr);
error("bad packed object CRC for %s",
sha1_to_hex(sha1));
mark_bad_packed_object(p, sha1);
data = NULL;
goto out;
}
}
type = unpack_object_header(p, &w_curs, &curpos, &size);
if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
break;
base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
if (!base_offset) {
error("failed to validate delta base reference "
"at offset %"PRIuMAX" from %s",
(uintmax_t)curpos, p->pack_name);
/* bail to phase 2, in hopes of recovery */
data = NULL;
break;
}
/* push object, proceed to base */
if (delta_stack_nr >= delta_stack_alloc
&& delta_stack == small_delta_stack) {
delta_stack_alloc = alloc_nr(delta_stack_nr);
ALLOC_ARRAY(delta_stack, delta_stack_alloc);
memcpy(delta_stack, small_delta_stack,
sizeof(*delta_stack)*delta_stack_nr);
} else {
ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
}
i = delta_stack_nr++;
delta_stack[i].obj_offset = obj_offset;
delta_stack[i].curpos = curpos;
delta_stack[i].size = size;
curpos = obj_offset = base_offset;
close another possibility for propagating pack corruption Abstract -------- With index v2 we have a per object CRC to allow quick and safe reuse of pack data when repacking. This, however, doesn't currently prevent a stealth corruption from being propagated into a new pack when _not_ reusing pack data as demonstrated by the modification to t5302 included here. The Context ----------- The Git database is all checksummed with SHA1 hashes. Any kind of corruption can be confirmed by verifying this per object hash against corresponding data. However this can be costly to perform systematically and therefore this check is often not performed at run time when accessing the object database. First, the loose object format is entirely compressed with zlib which already provide a CRC verification of its own when inflating data. Any disk corruption would be caught already in this case. Then, packed objects are also compressed with zlib but only for their actual payload. The object headers and delta base references are not deflated for obvious performance reasons, however this leave them vulnerable to potentially undetected disk corruptions. Object types are often validated against the expected type when they're requested, and deflated size must always match the size recorded in the object header, so those cases are pretty much covered as well. Where corruptions could go unnoticed is in the delta base reference. Of course, in the OBJ_REF_DELTA case, the odds for a SHA1 reference to get corrupted so it actually matches the SHA1 of another object with the same size (the delta header stores the expected size of the base object to apply against) are virtually zero. In the OBJ_OFS_DELTA case, the reference is a pack offset which would have to match the start boundary of a different base object but still with the same size, and although this is relatively much more "probable" than in the OBJ_REF_DELTA case, the probability is also about zero in absolute terms. Still, the possibility exists as demonstrated in t5302 and is certainly greater than a SHA1 collision, especially in the OBJ_OFS_DELTA case which is now the default when repacking. Again, repacking by reusing existing pack data is OK since the per object CRC provided by index v2 guards against any such corruptions. What t5302 failed to test is a full repack in such case. The Solution ------------ As unlikely as this kind of stealth corruption can be in practice, it certainly isn't acceptable to propagate it into a freshly created pack. But, because this is so unlikely, we don't want to pay the run time cost associated with extra validation checks all the time either. Furthermore, consequences of such corruption in anything but repacking should be rather visible, and even if it could be quite unpleasant, it still has far less severe consequences than actively creating bad packs. So the best compromize is to check packed object CRC when unpacking objects, and only during the compression/writing phase of a repack, and only when not streaming the result. The cost of this is minimal (less than 1% CPU time), and visible only with a full repack. Someone with a stats background could provide an objective evaluation of this, but I suspect that it's bad RAM that has more potential for data corruptions at this point, even in those cases where this extra check is not performed. Still, it is best to prevent a known hole for corruption when recreating object data into a new pack. What about the streamed pack case? Well, any client receiving a pack must always consider that pack as untrusty and perform full validation anyway, hence no such stealth corruption could be propagated to remote repositoryes already. It is therefore worthless doing local validation in that case. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-10-31 16:31:08 +01:00
}
/* PHASE 2: handle the base */
switch (type) {
case OBJ_OFS_DELTA:
case OBJ_REF_DELTA:
if (data)
die("BUG: unpack_entry: left loop at a valid delta");
break;
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
if (!base_from_cache)
data = unpack_compressed_entry(p, &w_curs, curpos, size);
break;
default:
data = NULL;
error("unknown object type %i at offset %"PRIuMAX" in %s",
type, (uintmax_t)obj_offset, p->pack_name);
}
/* PHASE 3: apply deltas in order */
/* invariants:
* 'data' holds the base data, or NULL if there was corruption
*/
while (delta_stack_nr) {
void *delta_data;
void *base = data;
void *external_base = NULL;
unsigned long delta_size, base_size = size;
int i;
data = NULL;
if (base)
add_delta_base_cache(p, obj_offset, base, base_size, type);
if (!base) {
/*
* We're probably in deep shit, but let's try to fetch
* the required base anyway from another pack or loose.
* This is costly but should happen only in the presence
* of a corrupted pack, and is better than failing outright.
*/
struct revindex_entry *revidx;
const unsigned char *base_sha1;
revidx = find_pack_revindex(p, obj_offset);
if (revidx) {
base_sha1 = nth_packed_object_sha1(p, revidx->nr);
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
p->pack_name);
mark_bad_packed_object(p, base_sha1);
base = read_object(base_sha1, &type, &base_size);
external_base = base;
}
}
i = --delta_stack_nr;
obj_offset = delta_stack[i].obj_offset;
curpos = delta_stack[i].curpos;
delta_size = delta_stack[i].size;
if (!base)
continue;
delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
if (!delta_data) {
error("failed to unpack compressed delta "
"at offset %"PRIuMAX" from %s",
(uintmax_t)curpos, p->pack_name);
data = NULL;
free(external_base);
continue;
}
data = patch_delta(base, base_size,
delta_data, delta_size,
&size);
unpack_entry: do not die when we fail to apply a delta When we try to load an object from disk and fail, our general strategy is to see if we can get it from somewhere else (e.g., a loose object). That lets users fix corruption problems by copying known-good versions of objects into the object database. We already handle the case where we were not able to read the delta from disk. However, when we find that the delta we read does not apply, we simply die. This case is harder to trigger, as corruption in the delta data itself would trigger a crc error from zlib. However, a corruption that pointed us at the wrong delta base might cause it. We can do the same "fail and try to find the object elsewhere" trick instead of dying. This not only gives us a chance to recover, but also puts us on code paths that will alert the user to the problem (with the current message, they do not even know which sha1 caused the problem). Note that unlike some other pack corruptions, we do not recover automatically from this case when doing a repack. There is nothing apparently wrong with the delta, as it points to a valid, accessible object, and we realize the error only when the resulting size does not match up. And in theory, one could even have a case where the corrupted size is the same, and the problem would only be noticed by recomputing the sha1. We can get around this by recomputing the deltas with --no-reuse-delta, which our test does (and this is probably good advice for anyone recovering from pack corruption). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-14 23:53:34 +02:00
/*
* We could not apply the delta; warn the user, but keep going.
* Our failure will be noticed either in the next iteration of
* the loop, or if this is the final delta, in the caller when
* we return NULL. Those code paths will take care of making
* a more explicit warning and retrying with another copy of
* the object.
*/
if (!data)
unpack_entry: do not die when we fail to apply a delta When we try to load an object from disk and fail, our general strategy is to see if we can get it from somewhere else (e.g., a loose object). That lets users fix corruption problems by copying known-good versions of objects into the object database. We already handle the case where we were not able to read the delta from disk. However, when we find that the delta we read does not apply, we simply die. This case is harder to trigger, as corruption in the delta data itself would trigger a crc error from zlib. However, a corruption that pointed us at the wrong delta base might cause it. We can do the same "fail and try to find the object elsewhere" trick instead of dying. This not only gives us a chance to recover, but also puts us on code paths that will alert the user to the problem (with the current message, they do not even know which sha1 caused the problem). Note that unlike some other pack corruptions, we do not recover automatically from this case when doing a repack. There is nothing apparently wrong with the delta, as it points to a valid, accessible object, and we realize the error only when the resulting size does not match up. And in theory, one could even have a case where the corrupted size is the same, and the problem would only be noticed by recomputing the sha1. We can get around this by recomputing the deltas with --no-reuse-delta, which our test does (and this is probably good advice for anyone recovering from pack corruption). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-06-14 23:53:34 +02:00
error("failed to apply delta");
free(delta_data);
free(external_base);
}
if (final_type)
*final_type = type;
if (final_size)
*final_size = size;
out:
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 08:34:08 +01:00
unuse_pack(&w_curs);
if (delta_stack != small_delta_stack)
free(delta_stack);
return data;
}
const unsigned char *nth_packed_object_sha1(struct packed_git *p,
uint32_t n)
{
const unsigned char *index = p->index_data;
if (!index) {
if (open_pack_index(p))
return NULL;
index = p->index_data;
}
if (n >= p->num_objects)
return NULL;
index += 4 * 256;
if (p->index_version == 1) {
return index + 24 * n + 4;
} else {
index += 8;
return index + 20 * n;
}
}
const struct object_id *nth_packed_object_oid(struct object_id *oid,
struct packed_git *p,
uint32_t n)
{
const unsigned char *hash = nth_packed_object_sha1(p, n);
if (!hash)
return NULL;
hashcpy(oid->hash, hash);
return oid;
}
void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
{
const unsigned char *ptr = vptr;
const unsigned char *start = p->index_data;
const unsigned char *end = start + p->index_size;
if (ptr < start)
die(_("offset before start of pack index for %s (corrupt index?)"),
p->pack_name);
/* No need to check for underflow; .idx files must be at least 8 bytes */
if (ptr >= end - 8)
die(_("offset beyond end of pack index for %s (truncated index?)"),
p->pack_name);
}
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
{
const unsigned char *index = p->index_data;
index += 4 * 256;
if (p->index_version == 1) {
return ntohl(*((uint32_t *)(index + 24 * n)));
} else {
uint32_t off;
index += 8 + p->num_objects * (20 + 4);
off = ntohl(*((uint32_t *)(index + 4 * n)));
if (!(off & 0x80000000))
return off;
index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
check_pack_index_ptr(p, index);
return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
ntohl(*((uint32_t *)(index + 4)));
}
}
off_t find_pack_entry_one(const unsigned char *sha1,
struct packed_git *p)
{
const uint32_t *level1_ofs = p->index_data;
const unsigned char *index = p->index_data;
sha1-lookup: more memory efficient search in sorted list of SHA-1 Currently, when looking for a packed object from the pack idx, a simple binary search is used. A conventional binary search loop looks like this: unsigned lo, hi; do { unsigned mi = (lo + hi) / 2; int cmp = "entry pointed at by mi" minus "target"; if (!cmp) return mi; "mi is the wanted one" if (cmp > 0) hi = mi; "mi is larger than target" else lo = mi+1; "mi is smaller than target" } while (lo < hi); "did not find what we wanted" The invariants are: - When entering the loop, 'lo' points at a slot that is never above the target (it could be at the target), 'hi' points at a slot that is guaranteed to be above the target (it can never be at the target). - We find a point 'mi' between 'lo' and 'hi' ('mi' could be the same as 'lo', but never can be as high as 'hi'), and check if 'mi' hits the target. There are three cases: - if it is a hit, we have found what we are looking for; - if it is strictly higher than the target, we set it to 'hi', and repeat the search. - if it is strictly lower than the target, we update 'lo' to one slot after it, because we allow 'lo' to be at the target and 'mi' is known to be below the target. If the loop exits, there is no matching entry. When choosing 'mi', we do not have to take the "middle" but anywhere in between 'lo' and 'hi', as long as lo <= mi < hi is satisfied. When we somehow know that the distance between the target and 'lo' is much shorter than the target and 'hi', we could pick 'mi' that is much closer to 'lo' than (hi+lo)/2, which a conventional binary search would pick. This patch takes advantage of the fact that the SHA-1 is a good hash function, and as long as there are enough entries in the table, we can expect uniform distribution. An entry that begins with for example "deadbeef..." is much likely to appear much later than in the midway of a reasonably populated table. In fact, it can be expected to be near 87% (222/256) from the top of the table. This is a work-in-progress and has switches to allow easier experiments and debugging. Exporting GIT_USE_LOOKUP environment variable enables this code. On my admittedly memory starved machine, with a partial KDE repository (3.0G pack with 95M idx): $ GIT_USE_LOOKUP=t git log -800 --stat HEAD >/dev/null 3.93user 0.16system 0:04.09elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+55588minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -800 --stat HEAD >/dev/null 4.00user 0.15system 0:04.17elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+60258minor)pagefaults 0swaps In the same repository: $ GIT_USE_LOOKUP=t git log -2000 HEAD >/dev/null 0.12user 0.00system 0:00.12elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+4241minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -2000 HEAD >/dev/null 0.05user 0.01system 0:00.07elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+8506minor)pagefaults 0swaps There isn't much time difference, but the number of minor faults seems to show that we are touching much smaller number of pages, which is expected. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-29 11:05:47 +01:00
unsigned hi, lo, stride;
static int debug_lookup = -1;
if (debug_lookup < 0)
debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
if (!index) {
if (open_pack_index(p))
return 0;
level1_ofs = p->index_data;
index = p->index_data;
}
if (p->index_version > 1) {
level1_ofs += 2;
index += 8;
}
index += 4 * 256;
hi = ntohl(level1_ofs[*sha1]);
lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
sha1-lookup: more memory efficient search in sorted list of SHA-1 Currently, when looking for a packed object from the pack idx, a simple binary search is used. A conventional binary search loop looks like this: unsigned lo, hi; do { unsigned mi = (lo + hi) / 2; int cmp = "entry pointed at by mi" minus "target"; if (!cmp) return mi; "mi is the wanted one" if (cmp > 0) hi = mi; "mi is larger than target" else lo = mi+1; "mi is smaller than target" } while (lo < hi); "did not find what we wanted" The invariants are: - When entering the loop, 'lo' points at a slot that is never above the target (it could be at the target), 'hi' points at a slot that is guaranteed to be above the target (it can never be at the target). - We find a point 'mi' between 'lo' and 'hi' ('mi' could be the same as 'lo', but never can be as high as 'hi'), and check if 'mi' hits the target. There are three cases: - if it is a hit, we have found what we are looking for; - if it is strictly higher than the target, we set it to 'hi', and repeat the search. - if it is strictly lower than the target, we update 'lo' to one slot after it, because we allow 'lo' to be at the target and 'mi' is known to be below the target. If the loop exits, there is no matching entry. When choosing 'mi', we do not have to take the "middle" but anywhere in between 'lo' and 'hi', as long as lo <= mi < hi is satisfied. When we somehow know that the distance between the target and 'lo' is much shorter than the target and 'hi', we could pick 'mi' that is much closer to 'lo' than (hi+lo)/2, which a conventional binary search would pick. This patch takes advantage of the fact that the SHA-1 is a good hash function, and as long as there are enough entries in the table, we can expect uniform distribution. An entry that begins with for example "deadbeef..." is much likely to appear much later than in the midway of a reasonably populated table. In fact, it can be expected to be near 87% (222/256) from the top of the table. This is a work-in-progress and has switches to allow easier experiments and debugging. Exporting GIT_USE_LOOKUP environment variable enables this code. On my admittedly memory starved machine, with a partial KDE repository (3.0G pack with 95M idx): $ GIT_USE_LOOKUP=t git log -800 --stat HEAD >/dev/null 3.93user 0.16system 0:04.09elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+55588minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -800 --stat HEAD >/dev/null 4.00user 0.15system 0:04.17elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+60258minor)pagefaults 0swaps In the same repository: $ GIT_USE_LOOKUP=t git log -2000 HEAD >/dev/null 0.12user 0.00system 0:00.12elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+4241minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -2000 HEAD >/dev/null 0.05user 0.01system 0:00.07elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+8506minor)pagefaults 0swaps There isn't much time difference, but the number of minor faults seems to show that we are touching much smaller number of pages, which is expected. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-29 11:05:47 +01:00
if (p->index_version > 1) {
stride = 20;
} else {
stride = 24;
index += 4;
}
if (debug_lookup)
printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
sha1-lookup: more memory efficient search in sorted list of SHA-1 Currently, when looking for a packed object from the pack idx, a simple binary search is used. A conventional binary search loop looks like this: unsigned lo, hi; do { unsigned mi = (lo + hi) / 2; int cmp = "entry pointed at by mi" minus "target"; if (!cmp) return mi; "mi is the wanted one" if (cmp > 0) hi = mi; "mi is larger than target" else lo = mi+1; "mi is smaller than target" } while (lo < hi); "did not find what we wanted" The invariants are: - When entering the loop, 'lo' points at a slot that is never above the target (it could be at the target), 'hi' points at a slot that is guaranteed to be above the target (it can never be at the target). - We find a point 'mi' between 'lo' and 'hi' ('mi' could be the same as 'lo', but never can be as high as 'hi'), and check if 'mi' hits the target. There are three cases: - if it is a hit, we have found what we are looking for; - if it is strictly higher than the target, we set it to 'hi', and repeat the search. - if it is strictly lower than the target, we update 'lo' to one slot after it, because we allow 'lo' to be at the target and 'mi' is known to be below the target. If the loop exits, there is no matching entry. When choosing 'mi', we do not have to take the "middle" but anywhere in between 'lo' and 'hi', as long as lo <= mi < hi is satisfied. When we somehow know that the distance between the target and 'lo' is much shorter than the target and 'hi', we could pick 'mi' that is much closer to 'lo' than (hi+lo)/2, which a conventional binary search would pick. This patch takes advantage of the fact that the SHA-1 is a good hash function, and as long as there are enough entries in the table, we can expect uniform distribution. An entry that begins with for example "deadbeef..." is much likely to appear much later than in the midway of a reasonably populated table. In fact, it can be expected to be near 87% (222/256) from the top of the table. This is a work-in-progress and has switches to allow easier experiments and debugging. Exporting GIT_USE_LOOKUP environment variable enables this code. On my admittedly memory starved machine, with a partial KDE repository (3.0G pack with 95M idx): $ GIT_USE_LOOKUP=t git log -800 --stat HEAD >/dev/null 3.93user 0.16system 0:04.09elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+55588minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -800 --stat HEAD >/dev/null 4.00user 0.15system 0:04.17elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+60258minor)pagefaults 0swaps In the same repository: $ GIT_USE_LOOKUP=t git log -2000 HEAD >/dev/null 0.12user 0.00system 0:00.12elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+4241minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -2000 HEAD >/dev/null 0.05user 0.01system 0:00.07elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+8506minor)pagefaults 0swaps There isn't much time difference, but the number of minor faults seems to show that we are touching much smaller number of pages, which is expected. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-29 11:05:47 +01:00
sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
while (lo < hi) {
unsigned mi = (lo + hi) / 2;
sha1-lookup: more memory efficient search in sorted list of SHA-1 Currently, when looking for a packed object from the pack idx, a simple binary search is used. A conventional binary search loop looks like this: unsigned lo, hi; do { unsigned mi = (lo + hi) / 2; int cmp = "entry pointed at by mi" minus "target"; if (!cmp) return mi; "mi is the wanted one" if (cmp > 0) hi = mi; "mi is larger than target" else lo = mi+1; "mi is smaller than target" } while (lo < hi); "did not find what we wanted" The invariants are: - When entering the loop, 'lo' points at a slot that is never above the target (it could be at the target), 'hi' points at a slot that is guaranteed to be above the target (it can never be at the target). - We find a point 'mi' between 'lo' and 'hi' ('mi' could be the same as 'lo', but never can be as high as 'hi'), and check if 'mi' hits the target. There are three cases: - if it is a hit, we have found what we are looking for; - if it is strictly higher than the target, we set it to 'hi', and repeat the search. - if it is strictly lower than the target, we update 'lo' to one slot after it, because we allow 'lo' to be at the target and 'mi' is known to be below the target. If the loop exits, there is no matching entry. When choosing 'mi', we do not have to take the "middle" but anywhere in between 'lo' and 'hi', as long as lo <= mi < hi is satisfied. When we somehow know that the distance between the target and 'lo' is much shorter than the target and 'hi', we could pick 'mi' that is much closer to 'lo' than (hi+lo)/2, which a conventional binary search would pick. This patch takes advantage of the fact that the SHA-1 is a good hash function, and as long as there are enough entries in the table, we can expect uniform distribution. An entry that begins with for example "deadbeef..." is much likely to appear much later than in the midway of a reasonably populated table. In fact, it can be expected to be near 87% (222/256) from the top of the table. This is a work-in-progress and has switches to allow easier experiments and debugging. Exporting GIT_USE_LOOKUP environment variable enables this code. On my admittedly memory starved machine, with a partial KDE repository (3.0G pack with 95M idx): $ GIT_USE_LOOKUP=t git log -800 --stat HEAD >/dev/null 3.93user 0.16system 0:04.09elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+55588minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -800 --stat HEAD >/dev/null 4.00user 0.15system 0:04.17elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+60258minor)pagefaults 0swaps In the same repository: $ GIT_USE_LOOKUP=t git log -2000 HEAD >/dev/null 0.12user 0.00system 0:00.12elapsed 97%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+4241minor)pagefaults 0swaps Without the patch, the numbers are: $ git log -2000 HEAD >/dev/null 0.05user 0.01system 0:00.07elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+8506minor)pagefaults 0swaps There isn't much time difference, but the number of minor faults seems to show that we are touching much smaller number of pages, which is expected. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-12-29 11:05:47 +01:00
int cmp = hashcmp(index + mi * stride, sha1);
if (debug_lookup)
printf("lo %u hi %u rg %u mi %u\n",
lo, hi, hi - lo, mi);
if (!cmp)
return nth_packed_object_offset(p, mi);
if (cmp > 0)
hi = mi;
else
lo = mi+1;
}
return 0;
}
pack-objects: protect against disappearing packs It's possible that while pack-objects is running, a simultaneously running prune process might delete a pack that we are interested in. Because we load the pack indices early on, we know that the pack contains our item, but by the time we try to open and map it, it is gone. Since c715f78, we already protect against this in the normal object access code path, but pack-objects accesses the packs at a lower level. In the normal access path, we call find_pack_entry, which will call find_pack_entry_one on each pack index, which does the actual lookup. If it gets a hit, we will actually open and verify the validity of the matching packfile (using c715f78's is_pack_valid). If we can't open it, we'll issue a warning and pretend that we didn't find it, causing us to go on to the next pack (or on to loose objects). Furthermore, we will cache the descriptor to the opened packfile. Which means that later, when we actually try to access the object, we are likely to still have that packfile opened, and won't care if it has been unlinked from the filesystem. Notice the "likely" above. If there is another pack access in the interim, and we run out of descriptors, we could close the pack. And then a later attempt to access the closed pack could fail (we'll try to re-open it, of course, but it may have been deleted). In practice, this doesn't happen because we tend to look up items and then access them immediately. Pack-objects does not follow this code path. Instead, it accesses the packs at a much lower level, using find_pack_entry_one directly. This means we skip the is_pack_valid check, and may end up with the name of a packfile, but no open descriptor. We can add the same is_pack_valid check here. Unfortunately, the access patterns of pack-objects are not quite as nice for keeping lookup and object access together. We look up each object as we find out about it, and the only later when writing the packfile do we necessarily access it. Which means that the opened packfile may be closed in the interim. In practice, however, adding this check still has value, for three reasons. 1. If you have a reasonable number of packs and/or a reasonable file descriptor limit, you can keep all of your packs open simultaneously. If this is the case, then the race is impossible to trigger. 2. Even if you can't keep all packs open at once, you may end up keeping the deleted one open (i.e., you may get lucky). 3. The race window is shortened. You may notice early that the pack is gone, and not try to access it. Triggering the problem without this check means deleting the pack any time after we read the list of index files, but before we access the looked-up objects. Triggering it with this check means deleting the pack means deleting the pack after we do a lookup (and successfully access the packfile), but before we access the object. Which is a smaller window. Acked-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-10-14 20:03:48 +02:00
int is_pack_valid(struct packed_git *p)
{
/* An already open pack is known to be valid. */
if (p->pack_fd != -1)
return 1;
/* If the pack has one window completely covering the
* file size, the pack is known to be valid even if
* the descriptor is not currently open.
*/
if (p->windows) {
struct pack_window *w = p->windows;
if (!w->offset && w->len == p->pack_size)
return 1;
}
/* Force the pack to open to prove its valid. */
return !open_packed_git(p);
}
static int fill_pack_entry(const unsigned char *sha1,
struct pack_entry *e,
struct packed_git *p)
{
off_t offset;
if (p->num_bad_objects) {
unsigned i;
for (i = 0; i < p->num_bad_objects; i++)
if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
return 0;
}
offset = find_pack_entry_one(sha1, p);
if (!offset)
return 0;
/*
* We are about to tell the caller where they can locate the
* requested object. We better make sure the packfile is
* still here and can be accessed before supplying that
* answer, as it may have been deleted since the index was
* loaded!
*/
sha1_file: squelch "packfile cannot be accessed" warnings When we find an object in a packfile index, we make sure we can still open the packfile itself (or that it is already open), as it might have been deleted by a simultaneous repack. If we can't access the packfile, we print a warning for the user and tell the caller that we don't have the object (we can then look in other packfiles, or find a loose version, before giving up). The warning we print to the user isn't really accomplishing anything, and it is potentially confusing to users. In the normal case, it is complete noise; we find the object elsewhere, and the user does not have to care that we racily saw a packfile index that became stale. It didn't affect the operation at all. A possibly more interesting case is when we later can't find the object, and report failure to the user. In this case the warning could be considered a clue toward that ultimate failure. But it's not really a useful clue in practice. We wouldn't even print it consistently (since we are racing with another process, we might not even see the .idx file, or we might win the race and open the packfile, completing the operation). This patch drops the warning entirely (not only from the fill_pack_entry site, but also from an identical use in pack-objects). If we did find the warning interesting in the error case, we could stuff it away and reveal it to the user when we later die() due to the broken object. But that complexity just isn't worth it. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-31 02:47:38 +02:00
if (!is_pack_valid(p))
return 0;
e->offset = offset;
e->p = p;
hashcpy(e->sha1, sha1);
return 1;
}
/*
* Iff a pack file contains the object named by sha1, return true and
* store its location to e.
*/
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
{
find_pack_entry: replace last_found_pack with MRU cache Each pack has an index for looking up entries in O(log n) time, but if we have multiple packs, we have to scan through them linearly. This can produce a measurable overhead for some operations. We dealt with this long ago in f7c22cc (always start looking up objects in the last used pack first, 2007-05-30), which keeps what is essentially a 1-element most-recently-used cache. In theory, we should be able to do better by keeping a similar but longer cache, that is the same length as the pack-list itself. Since we now have a convenient generic MRU structure, we can plug it in and measure. Here are the numbers for running p5303 against linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8% 5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0% 5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3% 5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6% 5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7% 5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9% The main numbers of interest here are the rev-list ones (since that is exercising the normal object lookup code path). The single-pack case shouldn't improve at all; the 260ms speedup there is just part of the run-to-run noise (but it's important to note that we didn't make anything worse with the overhead of maintaining our cache). In the 50-pack case, we see similar results. There may be a slight improvement, but it's mostly within the noise. The 1000-pack case does show a big improvement, though. That carries over to the repack case, as well. Even though we haven't touched its pack-search loop yet, it does still do a lot of normal object lookups (e.g., for the internal revision walk), and so improves. As a point of reference, I also ran the 1000-pack test against a version of HEAD^ with the last_found_pack optimization disabled. It takes ~60s, so that gives an indication of how much even the single-element cache is helping. For comparison, here's a smaller repository, git.git: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3% 5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1% 5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6% 5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0% 5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8% 5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7% You can see that the percentage improvement is similar. That's because the lookup we are optimizing is roughly O(nr_objects * nr_packs). Since the number of packs is constant in both tests, we'd expect the improvement to be linear in the number of objects. But the whole process is also linear in the number of objects, so the improvement is a constant factor. The exact improvement does also depend on the contents of the packs. In p5303, the extra packs all have 5 first-parent commits in them, which is a reasonable simulation of a pushed-to repository. But it also means that only 250 first-parent commits are in those packs (compared to almost 50,000 total in linux.git), and the rest are in the huge "base" pack. So once we start looking at history in taht big pack, that's where we'll find most everything, and even the 1-element cache gets close to 100% cache hits. You could almost certainly show better numbers with a more pathological case (e.g., distributing the objects more evenly across the packs). But that's simply not that realistic a scenario, so it makes more sense to focus on these numbers. The implementation itself is a straightforward application of the MRU code. We provide an MRU-ordered list of packs that shadows the packed_git list. This is easy to do because we only create and revise the pack list in one place. The "reprepare" code path actually drops the whole MRU and replaces it for simplicity. It would be more efficient to just add new entries, but there's not much point in optimizing here; repreparing happens rarely, and only after doing a lot of other expensive work. The key things to keep optimized are traversal (which is just a normal linked list, albeit with one extra level of indirection over the regular packed_git list), and marking (which is a constant number of pointer assignments, though slightly more than the old last_found_pack was; it doesn't seem to create a measurable slowdown, though). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
struct mru_entry *p;
prepare_packed_git();
if (!packed_git)
return 0;
find_pack_entry: replace last_found_pack with MRU cache Each pack has an index for looking up entries in O(log n) time, but if we have multiple packs, we have to scan through them linearly. This can produce a measurable overhead for some operations. We dealt with this long ago in f7c22cc (always start looking up objects in the last used pack first, 2007-05-30), which keeps what is essentially a 1-element most-recently-used cache. In theory, we should be able to do better by keeping a similar but longer cache, that is the same length as the pack-list itself. Since we now have a convenient generic MRU structure, we can plug it in and measure. Here are the numbers for running p5303 against linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5303.3: rev-list (1) 31.56(31.28+0.27) 31.30(31.08+0.20) -0.8% 5303.4: repack (1) 40.62(39.35+2.36) 40.60(39.27+2.44) -0.0% 5303.6: rev-list (50) 31.31(31.06+0.23) 31.23(31.00+0.22) -0.3% 5303.7: repack (50) 58.65(69.12+1.94) 58.27(68.64+2.05) -0.6% 5303.9: rev-list (1000) 38.74(38.40+0.33) 31.87(31.62+0.24) -17.7% 5303.10: repack (1000) 367.20(441.80+4.62) 342.00(414.04+3.72) -6.9% The main numbers of interest here are the rev-list ones (since that is exercising the normal object lookup code path). The single-pack case shouldn't improve at all; the 260ms speedup there is just part of the run-to-run noise (but it's important to note that we didn't make anything worse with the overhead of maintaining our cache). In the 50-pack case, we see similar results. There may be a slight improvement, but it's mostly within the noise. The 1000-pack case does show a big improvement, though. That carries over to the repack case, as well. Even though we haven't touched its pack-search loop yet, it does still do a lot of normal object lookups (e.g., for the internal revision walk), and so improves. As a point of reference, I also ran the 1000-pack test against a version of HEAD^ with the last_found_pack optimization disabled. It takes ~60s, so that gives an indication of how much even the single-element cache is helping. For comparison, here's a smaller repository, git.git: Test HEAD^ HEAD --------------------------------------------------------------------- 5303.3: rev-list (1) 1.56(1.54+0.01) 1.54(1.51+0.02) -1.3% 5303.4: repack (1) 1.84(1.80+0.10) 1.82(1.80+0.09) -1.1% 5303.6: rev-list (50) 1.58(1.55+0.02) 1.59(1.57+0.01) +0.6% 5303.7: repack (50) 2.50(3.18+0.04) 2.50(3.14+0.04) +0.0% 5303.9: rev-list (1000) 2.76(2.71+0.04) 2.24(2.21+0.02) -18.8% 5303.10: repack (1000) 13.21(19.56+0.25) 11.66(18.01+0.21) -11.7% You can see that the percentage improvement is similar. That's because the lookup we are optimizing is roughly O(nr_objects * nr_packs). Since the number of packs is constant in both tests, we'd expect the improvement to be linear in the number of objects. But the whole process is also linear in the number of objects, so the improvement is a constant factor. The exact improvement does also depend on the contents of the packs. In p5303, the extra packs all have 5 first-parent commits in them, which is a reasonable simulation of a pushed-to repository. But it also means that only 250 first-parent commits are in those packs (compared to almost 50,000 total in linux.git), and the rest are in the huge "base" pack. So once we start looking at history in taht big pack, that's where we'll find most everything, and even the 1-element cache gets close to 100% cache hits. You could almost certainly show better numbers with a more pathological case (e.g., distributing the objects more evenly across the packs). But that's simply not that realistic a scenario, so it makes more sense to focus on these numbers. The implementation itself is a straightforward application of the MRU code. We provide an MRU-ordered list of packs that shadows the packed_git list. This is easy to do because we only create and revise the pack list in one place. The "reprepare" code path actually drops the whole MRU and replaces it for simplicity. It would be more efficient to just add new entries, but there's not much point in optimizing here; repreparing happens rarely, and only after doing a lot of other expensive work. The key things to keep optimized are traversal (which is just a normal linked list, albeit with one extra level of indirection over the regular packed_git list), and marking (which is a constant number of pointer assignments, though slightly more than the old last_found_pack was; it doesn't seem to create a measurable slowdown, though). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-29 06:09:46 +02:00
for (p = packed_git_mru->head; p; p = p->next) {
if (fill_pack_entry(sha1, e, p->item)) {
mru_mark(packed_git_mru, p);
return 1;
}
}
return 0;
}
struct packed_git *find_sha1_pack(const unsigned char *sha1,
struct packed_git *packs)
{
struct packed_git *p;
for (p = packs; p; p = p->next) {
if (find_pack_entry_one(sha1, p))
return p;
}
return NULL;
}
static int sha1_loose_object_info(const unsigned char *sha1,
struct object_info *oi,
int flags)
{
int status = 0;
unsigned long mapsize;
void *map;
2011-06-10 20:52:15 +02:00
git_zstream stream;
char hdr[32];
struct strbuf hdrbuf = STRBUF_INIT;
unsigned long size_scratch;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
/*
* If we don't care about type or size, then we don't
* need to look inside the object at all. Note that we
* do not optimize out the stat call, even if the
* caller doesn't care about the disk-size, since our
* return value implicitly indicates whether the
* object even exists.
*/
if (!oi->typep && !oi->typename && !oi->sizep && !oi->contentp) {
const char *path;
struct stat st;
if (stat_sha1_file(sha1, &st, &path) < 0)
return -1;
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
return 0;
}
map = map_sha1_file(sha1, &mapsize);
if (!map)
return -1;
if (!oi->sizep)
oi->sizep = &size_scratch;
if (oi->disk_sizep)
*oi->disk_sizep = mapsize;
if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) {
if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
status = error("unable to unpack %s header with --allow-unknown-type",
sha1_to_hex(sha1));
} else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
status = error("unable to unpack %s header",
sha1_to_hex(sha1));
if (status < 0)
; /* Do nothing */
else if (hdrbuf.len) {
if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
status = error("unable to parse %s header with --allow-unknown-type",
sha1_to_hex(sha1));
} else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
status = error("unable to parse %s header", sha1_to_hex(sha1));
sha1_loose_object_info: handle errors from unpack_sha1_rest When a caller of sha1_object_info_extended() sets the "contentp" field in object_info, we call unpack_sha1_rest() but do not check whether it signaled an error. This causes two problems: 1. We pass back NULL to the caller via the contentp field, but the function returns "0" for success. A caller might reasonably expect after a successful return that it can access contentp without a NULL check and segfault. As it happens, this is impossible to trigger in the current code. There is exactly one caller which uses contentp, read_object(). And the only thing it does after a successful call is to return the content pointer to its caller, using NULL as a sentinel for errors. So in effect it converts the success code from sha1_object_info_extended() back into an error! But this is still worth addressing avoid problems for future users of "contentp". 2. Callers of unpack_sha1_rest() are expected to close the zlib stream themselves on error. Which means that we're leaking the stream. The problem in (1) comes from from c84a1f3ed4 (sha1_file: refactor read_object, 2017-06-21), which added the contentp field. Before that, we called unpack_sha1_rest() via unpack_sha1_file(), which directly used the NULL to signal an error. But note that the leak in (2) is actually older than that. The original unpack_sha1_file() directly returned the result of unpack_sha1_rest() to its caller, when it should have been closing the zlib stream itself on error. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 07:59:52 +02:00
if (status >= 0 && oi->contentp) {
*oi->contentp = unpack_sha1_rest(&stream, hdr,
*oi->sizep, sha1);
sha1_loose_object_info: handle errors from unpack_sha1_rest When a caller of sha1_object_info_extended() sets the "contentp" field in object_info, we call unpack_sha1_rest() but do not check whether it signaled an error. This causes two problems: 1. We pass back NULL to the caller via the contentp field, but the function returns "0" for success. A caller might reasonably expect after a successful return that it can access contentp without a NULL check and segfault. As it happens, this is impossible to trigger in the current code. There is exactly one caller which uses contentp, read_object(). And the only thing it does after a successful call is to return the content pointer to its caller, using NULL as a sentinel for errors. So in effect it converts the success code from sha1_object_info_extended() back into an error! But this is still worth addressing avoid problems for future users of "contentp". 2. Callers of unpack_sha1_rest() are expected to close the zlib stream themselves on error. Which means that we're leaking the stream. The problem in (1) comes from from c84a1f3ed4 (sha1_file: refactor read_object, 2017-06-21), which added the contentp field. Before that, we called unpack_sha1_rest() via unpack_sha1_file(), which directly used the NULL to signal an error. But note that the leak in (2) is actually older than that. The original unpack_sha1_file() directly returned the result of unpack_sha1_rest() to its caller, when it should have been closing the zlib stream itself on error. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-10-05 07:59:52 +02:00
if (!*oi->contentp) {
git_inflate_end(&stream);
status = -1;
}
} else
git_inflate_end(&stream);
munmap(map, mapsize);
if (status && oi->typep)
*oi->typep = status;
if (oi->sizep == &size_scratch)
oi->sizep = NULL;
strbuf_release(&hdrbuf);
return (status < 0) ? status : 0;
}
int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
{
static struct object_info blank_oi = OBJECT_INFO_INIT;
struct pack_entry e;
sha1_object_info_extended: make type calculation optional Each caller of sha1_object_info_extended sets up an object_info struct to tell the function which elements of the object it wants to get. Until now, getting the type of the object has always been required (and it is returned via the return type rather than a pointer in object_info). This can involve actually opening a loose object file to determine its type, or following delta chains to determine a packed file's base type. These effects produce a measurable slow-down when doing a "cat-file --batch-check" that does not include %(objecttype). This patch adds a "typep" query to struct object_info, so that it can be optionally queried just like size and disk_size. As a result, the return type of the function is no longer the object type, but rather 0/-1 for success/error. As there are only three callers total, we just fix up each caller rather than keep a compatibility wrapper: 1. The simpler sha1_object_info wrapper continues to always ask for and return the type field. 2. The istream_source function wants to know the type, and so always asks for it. 3. The cat-file batch code asks for the type only when %(objecttype) is part of the format string. On linux.git, the best-of-five for running: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectsize:disk)' on a fully packed repository goes from: real 0m8.680s user 0m8.160s sys 0m0.512s to: real 0m7.205s user 0m6.580s sys 0m0.608s Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:34:57 +02:00
int rtype;
const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ?
lookup_replace_object(sha1) :
sha1;
if (!oi)
oi = &blank_oi;
if (!(flags & OBJECT_INFO_SKIP_CACHED)) {
struct cached_object *co = find_cached_object(real);
if (co) {
if (oi->typep)
*(oi->typep) = co->type;
if (oi->sizep)
*(oi->sizep) = co->size;
if (oi->disk_sizep)
*(oi->disk_sizep) = 0;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
if (oi->typename)
strbuf_addstr(oi->typename, typename(co->type));
if (oi->contentp)
*oi->contentp = xmemdupz(co->buf, co->size);
oi->whence = OI_CACHED;
return 0;
}
}
if (!find_pack_entry(real, &e)) {
/* Most likely it's a loose object. */
if (!sha1_loose_object_info(real, oi, flags)) {
oi->whence = OI_LOOSE;
sha1_object_info_extended: make type calculation optional Each caller of sha1_object_info_extended sets up an object_info struct to tell the function which elements of the object it wants to get. Until now, getting the type of the object has always been required (and it is returned via the return type rather than a pointer in object_info). This can involve actually opening a loose object file to determine its type, or following delta chains to determine a packed file's base type. These effects produce a measurable slow-down when doing a "cat-file --batch-check" that does not include %(objecttype). This patch adds a "typep" query to struct object_info, so that it can be optionally queried just like size and disk_size. As a result, the return type of the function is no longer the object type, but rather 0/-1 for success/error. As there are only three callers total, we just fix up each caller rather than keep a compatibility wrapper: 1. The simpler sha1_object_info wrapper continues to always ask for and return the type field. 2. The istream_source function wants to know the type, and so always asks for it. 3. The cat-file batch code asks for the type only when %(objecttype) is part of the format string. On linux.git, the best-of-five for running: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectsize:disk)' on a fully packed repository goes from: real 0m8.680s user 0m8.160s sys 0m0.512s to: real 0m7.205s user 0m6.580s sys 0m0.608s Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:34:57 +02:00
return 0;
}
/* Not a loose object; someone else may have just packed it. */
if (flags & OBJECT_INFO_QUICK) {
return -1;
} else {
reprepare_packed_git();
if (!find_pack_entry(real, &e))
return -1;
}
}
if (oi == &blank_oi)
/*
* We know that the caller doesn't actually need the
* information below, so return early.
*/
return 0;
rtype = packed_object_info(e.p, e.offset, oi);
if (rtype < 0) {
mark_bad_packed_object(e.p, real);
return sha1_object_info_extended(real, oi, 0);
} else if (in_delta_base_cache(e.p, e.offset)) {
oi->whence = OI_DBCACHED;
} else {
oi->whence = OI_PACKED;
oi->u.packed.offset = e.offset;
oi->u.packed.pack = e.p;
oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
rtype == OBJ_OFS_DELTA);
}
sha1_object_info_extended: make type calculation optional Each caller of sha1_object_info_extended sets up an object_info struct to tell the function which elements of the object it wants to get. Until now, getting the type of the object has always been required (and it is returned via the return type rather than a pointer in object_info). This can involve actually opening a loose object file to determine its type, or following delta chains to determine a packed file's base type. These effects produce a measurable slow-down when doing a "cat-file --batch-check" that does not include %(objecttype). This patch adds a "typep" query to struct object_info, so that it can be optionally queried just like size and disk_size. As a result, the return type of the function is no longer the object type, but rather 0/-1 for success/error. As there are only three callers total, we just fix up each caller rather than keep a compatibility wrapper: 1. The simpler sha1_object_info wrapper continues to always ask for and return the type field. 2. The istream_source function wants to know the type, and so always asks for it. 3. The cat-file batch code asks for the type only when %(objecttype) is part of the format string. On linux.git, the best-of-five for running: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectsize:disk)' on a fully packed repository goes from: real 0m8.680s user 0m8.160s sys 0m0.512s to: real 0m7.205s user 0m6.580s sys 0m0.608s Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:34:57 +02:00
return 0;
}
/* returns enum object_type or negative */
int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
{
sha1_object_info_extended: make type calculation optional Each caller of sha1_object_info_extended sets up an object_info struct to tell the function which elements of the object it wants to get. Until now, getting the type of the object has always been required (and it is returned via the return type rather than a pointer in object_info). This can involve actually opening a loose object file to determine its type, or following delta chains to determine a packed file's base type. These effects produce a measurable slow-down when doing a "cat-file --batch-check" that does not include %(objecttype). This patch adds a "typep" query to struct object_info, so that it can be optionally queried just like size and disk_size. As a result, the return type of the function is no longer the object type, but rather 0/-1 for success/error. As there are only three callers total, we just fix up each caller rather than keep a compatibility wrapper: 1. The simpler sha1_object_info wrapper continues to always ask for and return the type field. 2. The istream_source function wants to know the type, and so always asks for it. 3. The cat-file batch code asks for the type only when %(objecttype) is part of the format string. On linux.git, the best-of-five for running: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectsize:disk)' on a fully packed repository goes from: real 0m8.680s user 0m8.160s sys 0m0.512s to: real 0m7.205s user 0m6.580s sys 0m0.608s Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:34:57 +02:00
enum object_type type;
provide an initializer for "struct object_info" An all-zero initializer is fine for this struct, but because the first element is a pointer, call sites need to know to use "NULL" instead of "0". Otherwise some static checkers like "sparse" will complain; see d099b71 (Fix some sparse warnings, 2013-07-18) for example. So let's provide an initializer to make this easier to get right. But let's also comment that memset() to zero is explicitly OK[1]. One of the callers embeds object_info in another struct which is initialized via memset (expand_data in builtin/cat-file.c). Since our subset of C doesn't allow assignment from a compound literal, handling this in any other way is awkward, so we'd like to keep the ability to initialize by memset(). By documenting this property, it should make anybody who wants to change the initializer think twice before doing so. There's one other caller of interest. In parse_sha1_header(), we did not initialize the struct fully in the first place. This turned out not to be a bug because the sub-function it calls does not look at any other fields except the ones we did initialize. But that assumption might not hold in the future, so it's a dangerous construct. This patch switches it to initializing the whole struct, which protects us against unexpected reads of the other fields. [1] Obviously using memset() to initialize a pointer violates the C standard, but we long ago decided that it was an acceptable tradeoff in the real world. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-08-11 11:24:35 +02:00
struct object_info oi = OBJECT_INFO_INIT;
sha1_object_info_extended: make type calculation optional Each caller of sha1_object_info_extended sets up an object_info struct to tell the function which elements of the object it wants to get. Until now, getting the type of the object has always been required (and it is returned via the return type rather than a pointer in object_info). This can involve actually opening a loose object file to determine its type, or following delta chains to determine a packed file's base type. These effects produce a measurable slow-down when doing a "cat-file --batch-check" that does not include %(objecttype). This patch adds a "typep" query to struct object_info, so that it can be optionally queried just like size and disk_size. As a result, the return type of the function is no longer the object type, but rather 0/-1 for success/error. As there are only three callers total, we just fix up each caller rather than keep a compatibility wrapper: 1. The simpler sha1_object_info wrapper continues to always ask for and return the type field. 2. The istream_source function wants to know the type, and so always asks for it. 3. The cat-file batch code asks for the type only when %(objecttype) is part of the format string. On linux.git, the best-of-five for running: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectsize:disk)' on a fully packed repository goes from: real 0m8.680s user 0m8.160s sys 0m0.512s to: real 0m7.205s user 0m6.580s sys 0m0.608s Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:34:57 +02:00
oi.typep = &type;
oi.sizep = sizep;
if (sha1_object_info_extended(sha1, &oi,
OBJECT_INFO_LOOKUP_REPLACE) < 0)
sha1_object_info_extended: make type calculation optional Each caller of sha1_object_info_extended sets up an object_info struct to tell the function which elements of the object it wants to get. Until now, getting the type of the object has always been required (and it is returned via the return type rather than a pointer in object_info). This can involve actually opening a loose object file to determine its type, or following delta chains to determine a packed file's base type. These effects produce a measurable slow-down when doing a "cat-file --batch-check" that does not include %(objecttype). This patch adds a "typep" query to struct object_info, so that it can be optionally queried just like size and disk_size. As a result, the return type of the function is no longer the object type, but rather 0/-1 for success/error. As there are only three callers total, we just fix up each caller rather than keep a compatibility wrapper: 1. The simpler sha1_object_info wrapper continues to always ask for and return the type field. 2. The istream_source function wants to know the type, and so always asks for it. 3. The cat-file batch code asks for the type only when %(objecttype) is part of the format string. On linux.git, the best-of-five for running: $ git rev-list --objects --all >objects $ time git cat-file --batch-check='%(objectsize:disk)' on a fully packed repository goes from: real 0m8.680s user 0m8.160s sys 0m0.512s to: real 0m7.205s user 0m6.580s sys 0m0.608s Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-12 08:34:57 +02:00
return -1;
return type;
}
static void *read_packed_sha1(const unsigned char *sha1,
enum object_type *type, unsigned long *size)
{
struct pack_entry e;
void *data;
if (!find_pack_entry(sha1, &e))
return NULL;
data = cache_or_unpack_entry(e.p, e.offset, size, type);
if (!data) {
/*
* We're probably in deep shit, but let's try to fetch
* the required object anyway from another pack or loose.
* This should happen only in the presence of a corrupted
* pack, and is better than failing outright.
*/
error("failed to read object %s at offset %"PRIuMAX" from %s",
sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
mark_bad_packed_object(e.p, sha1);
data = read_object(sha1, type, size);
}
return data;
}
int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
unsigned char *sha1)
{
struct cached_object *co;
hash_sha1_file(buf, len, typename(type), sha1);
if (has_sha1_file(sha1) || find_cached_object(sha1))
return 0;
ALLOC_GROW(cached_objects, cached_object_nr + 1, cached_object_alloc);
co = &cached_objects[cached_object_nr++];
co->size = len;
co->type = type;
co->buf = xmalloc(len);
memcpy(co->buf, buf, len);
hashcpy(co->sha1, sha1);
return 0;
}
static void *read_object(const unsigned char *sha1, enum object_type *type,
unsigned long *size)
{
struct object_info oi = OBJECT_INFO_INIT;
void *content;
oi.typep = type;
oi.sizep = size;
oi.contentp = &content;
if (sha1_object_info_extended(sha1, &oi, 0) < 0)
return NULL;
return content;
}
/*
* This function dies on corrupt objects; the callers who want to
* deal with them should arrange to call read_object() and give error
* messages themselves.
*/
void *read_sha1_file_extended(const unsigned char *sha1,
enum object_type *type,
unsigned long *size,
int lookup_replace)
{
void *data;
const struct packed_git *p;
const char *path;
struct stat st;
const unsigned char *repl = lookup_replace ? lookup_replace_object(sha1)
: sha1;
errno = 0;
data = read_object(repl, type, size);
if (data)
return data;
if (errno && errno != ENOENT)
die_errno("failed to read object %s", sha1_to_hex(sha1));
/* die if we replaced an object with one that does not exist */
if (repl != sha1)
die("replacement %s not found for %s",
sha1_to_hex(repl), sha1_to_hex(sha1));
if (!stat_sha1_file(repl, &st, &path))
die("loose object %s (stored in %s) is corrupt",
sha1_to_hex(repl), path);
if ((p = has_packed_and_bad(repl)) != NULL)
die("packed object %s (stored in %s) is corrupt",
sha1_to_hex(repl), p->pack_name);
return NULL;
}
void *read_object_with_reference(const unsigned char *sha1,
const char *required_type_name,
unsigned long *size,
unsigned char *actual_sha1_return)
{
enum object_type type, required_type;
void *buffer;
unsigned long isize;
unsigned char actual_sha1[20];
required_type = type_from_string(required_type_name);
hashcpy(actual_sha1, sha1);
while (1) {
int ref_length = -1;
const char *ref_type = NULL;
buffer = read_sha1_file(actual_sha1, &type, &isize);
if (!buffer)
return NULL;
if (type == required_type) {
*size = isize;
if (actual_sha1_return)
hashcpy(actual_sha1_return, actual_sha1);
return buffer;
}
/* Handle references */
else if (type == OBJ_COMMIT)
ref_type = "tree ";
else if (type == OBJ_TAG)
ref_type = "object ";
else {
free(buffer);
return NULL;
}
ref_length = strlen(ref_type);
if (ref_length + 40 > isize ||
memcmp(buffer, ref_type, ref_length) ||
get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
free(buffer);
return NULL;
}
free(buffer);
/* Now we have the ID of the referred-to object in
* actual_sha1. Check again. */
}
}
static void write_sha1_file_prepare(const void *buf, unsigned long len,
const char *type, unsigned char *sha1,
char *hdr, int *hdrlen)
{
fix openssl headers conflicting with custom SHA1 implementations On ARM I have the following compilation errors: CC fast-import.o In file included from cache.h:8, from builtin.h:6, from fast-import.c:142: arm/sha1.h:14: error: conflicting types for 'SHA_CTX' /usr/include/openssl/sha.h:105: error: previous declaration of 'SHA_CTX' was here arm/sha1.h:16: error: conflicting types for 'SHA1_Init' /usr/include/openssl/sha.h:115: error: previous declaration of 'SHA1_Init' was here arm/sha1.h:17: error: conflicting types for 'SHA1_Update' /usr/include/openssl/sha.h:116: error: previous declaration of 'SHA1_Update' was here arm/sha1.h:18: error: conflicting types for 'SHA1_Final' /usr/include/openssl/sha.h:117: error: previous declaration of 'SHA1_Final' was here make: *** [fast-import.o] Error 1 This is because openssl header files are always included in git-compat-util.h since commit 684ec6c63c whenever NO_OPENSSL is not set, which somehow brings in <openssl/sha1.h> clashing with the custom ARM version. Compilation of git is probably broken on PPC too for the same reason. Turns out that the only file requiring openssl/ssl.h and openssl/err.h is imap-send.c. But only moving those problematic includes there doesn't solve the issue as it also includes cache.h which brings in the conflicting local SHA1 header file. As suggested by Jeff King, the best solution is to rename our references to SHA1 functions and structure to something git specific, and define those according to the implementation used. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-01 20:05:20 +02:00
git_SHA_CTX c;
/* Generate the header */
*hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1;
/* Sha1.. */
fix openssl headers conflicting with custom SHA1 implementations On ARM I have the following compilation errors: CC fast-import.o In file included from cache.h:8, from builtin.h:6, from fast-import.c:142: arm/sha1.h:14: error: conflicting types for 'SHA_CTX' /usr/include/openssl/sha.h:105: error: previous declaration of 'SHA_CTX' was here arm/sha1.h:16: error: conflicting types for 'SHA1_Init' /usr/include/openssl/sha.h:115: error: previous declaration of 'SHA1_Init' was here arm/sha1.h:17: error: conflicting types for 'SHA1_Update' /usr/include/openssl/sha.h:116: error: previous declaration of 'SHA1_Update' was here arm/sha1.h:18: error: conflicting types for 'SHA1_Final' /usr/include/openssl/sha.h:117: error: previous declaration of 'SHA1_Final' was here make: *** [fast-import.o] Error 1 This is because openssl header files are always included in git-compat-util.h since commit 684ec6c63c whenever NO_OPENSSL is not set, which somehow brings in <openssl/sha1.h> clashing with the custom ARM version. Compilation of git is probably broken on PPC too for the same reason. Turns out that the only file requiring openssl/ssl.h and openssl/err.h is imap-send.c. But only moving those problematic includes there doesn't solve the issue as it also includes cache.h which brings in the conflicting local SHA1 header file. As suggested by Jeff King, the best solution is to rename our references to SHA1 functions and structure to something git specific, and define those according to the implementation used. Signed-off-by: Nicolas Pitre <nico@cam.org> Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-01 20:05:20 +02:00
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, *hdrlen);
git_SHA1_Update(&c, buf, len);
git_SHA1_Final(sha1, &c);
}
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
/*
* Move the just written object into its final resting place.
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
*/
int finalize_object_file(const char *tmpfile, const char *filename)
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
{
int ret = 0;
if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
goto try_rename;
else if (link(tmpfile, filename))
ret = errno;
/*
* Coda hack - coda doesn't like cross-directory links,
* so we fall back to a rename, which will mean that it
* won't be able to check collisions, but that's not a
* big deal.
*
* The same holds for FAT formatted media.
*
* When this succeeds, we just return. We have nothing
* left to unlink.
*/
if (ret && ret != EEXIST) {
try_rename:
if (!rename(tmpfile, filename))
goto out;
ret = errno;
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
}
unlink_or_warn(tmpfile);
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
if (ret) {
if (ret != EEXIST) {
return error_errno("unable to write sha1 filename %s", filename);
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
}
/* FIXME!!! Collision check here ? */
}
out:
if (adjust_shared_perm(filename))
return error("unable to set permission to '%s'", filename);
Create object subdirectories on demand This makes it possible to have a "sparse" git object subdirectory structure, something that has become much more attractive now that people use pack-files all the time. As a result of pack-files, a git object directory doesn't necessarily have any individual objects lying around, and in that case it's just wasting space to keep the empty first-level object directories around: on many filesystems the 256 empty directories will be aboue 1MB of diskspace. Even more importantly, after you re-pack a project that _used_ to be unpacked, you could be left with huge directories that no longer contain anything, but that waste space and take time to look through. With this change, "git prune-packed" can just do an rmdir() on the directories, and they'll get removed if empty, and re-created on demand. This patch also tries to fix up "write_sha1_from_fd()" to use the new common infrastructure for creating the object files, closing a hole where we might otherwise leave half-written objects in the object database. [jc: I unoptimized the part that really removes the fan-out directories to ease transition. init-db still wastes 1MB of diskspace to hold 256 empty fan-outs, and prune-packed rmdir()'s the grown but empty directories, but runs mkdir() immediately after that -- reducing the saving from 150KB to 146KB. These parts will be re-introduced when everybody has the on-demand capability.] Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-09 00:54:01 +02:00
return 0;
}
static int write_buffer(int fd, const void *buf, size_t len)
{
if (write_in_full(fd, buf, len) < 0)
return error_errno("file write error");
return 0;
}
int hash_sha1_file(const void *buf, unsigned long len, const char *type,
unsigned char *sha1)
{
char hdr[32];
int hdrlen = sizeof(hdr);
write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
return 0;
}
/* Finalize a file on disk, and close it. */
static void close_sha1_file(int fd)
{
if (fsync_object_files)
fsync_or_die(fd, "sha1 file");
if (close(fd) != 0)
die_errno("error when closing sha1 file");
}
/* Size of directory component, including the ending '/' */
static inline int directory_size(const char *filename)
{
const char *s = strrchr(filename, '/');
if (!s)
return 0;
return s - filename + 1;
}
/*
* This creates a temporary file in the same directory as the final
* 'filename'
*
* We want to avoid cross-directory filename renames, because those
* can have problems on various filesystems (FAT, NFS, Coda).
*/
static int create_tmpfile(struct strbuf *tmp, const char *filename)
{
int fd, dirlen = directory_size(filename);
strbuf_reset(tmp);
strbuf_add(tmp, filename, dirlen);
strbuf_addstr(tmp, "tmp_obj_XXXXXX");
fd = git_mkstemp_mode(tmp->buf, 0444);
if (fd < 0 && dirlen && errno == ENOENT) {
/*
* Make sure the directory exists; note that the contents
* of the buffer are undefined after mkstemp returns an
* error, so we have to rewrite the whole buffer from
* scratch.
*/
strbuf_reset(tmp);
strbuf_add(tmp, filename, dirlen - 1);
if (mkdir(tmp->buf, 0777) && errno != EEXIST)
sha1_file.c:create_tmpfile(): Fix race when creating loose object dirs There are cases (e.g. when running concurrent fetches in a repo) where multiple Git processes concurrently attempt to create loose objects within the same objects/XX/ dir. The creation of the loose object files is (AFAICS) safe from races, but the creation of the objects/XX/ dir in which the loose objects reside is unsafe, for example: Two concurrent fetches - A and B. As part of its fetch, A needs to store 12aaaaa as a loose object. B, on the other hand, needs to store 12bbbbb as a loose object. The objects/12 directory does not already exist. Concurrently, both A and B determine that they need to create the objects/12 directory (because their first call to git_mkstemp_mode() within create_tmpfile() fails witn ENOENT). One of them - let's say A - executes the following mkdir() call before the other. This first call returns success, and A moves on. When B gets around to calling mkdir(), it fails with EEXIST, because A won the race. The mkdir() error causes B to return -1 from create_tmpfile(), which propagates all the way, resulting in the fetch failing with: error: unable to create temporary file: File exists fatal: failed to write object fatal: unpack-objects failed Although it's hard to add a testcase reproducing this issue, it's easy to provoke if we insert a sleep after the if (mkdir(buffer, 0777) || adjust_shared_perm(buffer)) return -1; block, and then run two concurrent "git fetch"es against the same repo. The fix is to simply handle mkdir() failing with EEXIST as a success. If EEXIST is somehow returned for the wrong reasons (because the relevant objects/XX is not a directory, or is otherwise unsuitable for object storage), the following call to adjust_shared_perm(), or ultimately the retried call to git_mkstemp_mode() will fail, and we end up returning error from create_tmpfile() in any case. Note that there are still cases where two users with unsuitable umasks in a shared repo can end up in two races where one user first wins the mkdir() race to create an objects/XX/ directory, and then the other user wins the adjust_shared_perms() race to chmod() that directory, but fails because it is (transiently, until the first users completes its chmod()) unwriteable to the other user. However, (an equivalent of) this race also exists before this patch, and is made no worse by this patch. Signed-off-by: Johan Herland <johan@herland.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-10-27 12:35:43 +01:00
return -1;
if (adjust_shared_perm(tmp->buf))
return -1;
/* Try again */
strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
fd = git_mkstemp_mode(tmp->buf, 0444);
}
return fd;
}
static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
const void *buf, unsigned long len, time_t mtime)
{
int fd, ret;
unsigned char compressed[4096];
2011-06-10 20:52:15 +02:00
git_zstream stream;
sha1_file: be paranoid when creating loose objects We don't want the data being deflated and stored into loose objects to be different from what we expect. While the deflated data is protected by a CRC which is good enough for safe data retrieval operations, we still want to be doubly sure that the source data used at object creation time is still what we expected once that data has been deflated and its CRC32 computed. The most plausible data corruption may occur if the source file is modified while Git is deflating and writing it out in a loose object. Or Git itself could have a bug causing memory corruption. Or even bad RAM could cause trouble. So it is best to make sure everything is coherent and checksum protected from beginning to end. To do so we compute the SHA1 of the data being deflated _after_ the deflate operation has consumed that data, and make sure it matches with the expected SHA1. This way we can rely on the CRC32 checked by the inflate operation to provide a good indication that the data is still coherent with its SHA1 hash. One pathological case we ignore is when the data is modified before (or during) deflate call, but changed back before it is hashed. There is some overhead of course. Using 'git add' on a set of large files: Before: real 0m25.210s user 0m23.783s sys 0m1.408s After: real 0m26.537s user 0m25.175s sys 0m1.358s The overhead is around 5% for full data coherency guarantee. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-21 21:48:06 +01:00
git_SHA_CTX c;
unsigned char parano_sha1[20];
static struct strbuf tmp_file = STRBUF_INIT;
const char *filename = sha1_file_name(sha1);
fd = create_tmpfile(&tmp_file, filename);
if (fd < 0) {
if (errno == EACCES)
return error("insufficient permission for adding an object to repository database %s", get_object_directory());
else
return error_errno("unable to create temporary file");
}
/* Set it up */
git_deflate_init(&stream, zlib_compression_level);
stream.next_out = compressed;
stream.avail_out = sizeof(compressed);
sha1_file: be paranoid when creating loose objects We don't want the data being deflated and stored into loose objects to be different from what we expect. While the deflated data is protected by a CRC which is good enough for safe data retrieval operations, we still want to be doubly sure that the source data used at object creation time is still what we expected once that data has been deflated and its CRC32 computed. The most plausible data corruption may occur if the source file is modified while Git is deflating and writing it out in a loose object. Or Git itself could have a bug causing memory corruption. Or even bad RAM could cause trouble. So it is best to make sure everything is coherent and checksum protected from beginning to end. To do so we compute the SHA1 of the data being deflated _after_ the deflate operation has consumed that data, and make sure it matches with the expected SHA1. This way we can rely on the CRC32 checked by the inflate operation to provide a good indication that the data is still coherent with its SHA1 hash. One pathological case we ignore is when the data is modified before (or during) deflate call, but changed back before it is hashed. There is some overhead of course. Using 'git add' on a set of large files: Before: real 0m25.210s user 0m23.783s sys 0m1.408s After: real 0m26.537s user 0m25.175s sys 0m1.358s The overhead is around 5% for full data coherency guarantee. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-21 21:48:06 +01:00
git_SHA1_Init(&c);
/* First header.. */
stream.next_in = (unsigned char *)hdr;
stream.avail_in = hdrlen;
while (git_deflate(&stream, 0) == Z_OK)
; /* nothing */
sha1_file: be paranoid when creating loose objects We don't want the data being deflated and stored into loose objects to be different from what we expect. While the deflated data is protected by a CRC which is good enough for safe data retrieval operations, we still want to be doubly sure that the source data used at object creation time is still what we expected once that data has been deflated and its CRC32 computed. The most plausible data corruption may occur if the source file is modified while Git is deflating and writing it out in a loose object. Or Git itself could have a bug causing memory corruption. Or even bad RAM could cause trouble. So it is best to make sure everything is coherent and checksum protected from beginning to end. To do so we compute the SHA1 of the data being deflated _after_ the deflate operation has consumed that data, and make sure it matches with the expected SHA1. This way we can rely on the CRC32 checked by the inflate operation to provide a good indication that the data is still coherent with its SHA1 hash. One pathological case we ignore is when the data is modified before (or during) deflate call, but changed back before it is hashed. There is some overhead of course. Using 'git add' on a set of large files: Before: real 0m25.210s user 0m23.783s sys 0m1.408s After: real 0m26.537s user 0m25.175s sys 0m1.358s The overhead is around 5% for full data coherency guarantee. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-21 21:48:06 +01:00
git_SHA1_Update(&c, hdr, hdrlen);
/* Then the data itself.. */
stream.next_in = (void *)buf;
stream.avail_in = len;
do {
sha1_file: be paranoid when creating loose objects We don't want the data being deflated and stored into loose objects to be different from what we expect. While the deflated data is protected by a CRC which is good enough for safe data retrieval operations, we still want to be doubly sure that the source data used at object creation time is still what we expected once that data has been deflated and its CRC32 computed. The most plausible data corruption may occur if the source file is modified while Git is deflating and writing it out in a loose object. Or Git itself could have a bug causing memory corruption. Or even bad RAM could cause trouble. So it is best to make sure everything is coherent and checksum protected from beginning to end. To do so we compute the SHA1 of the data being deflated _after_ the deflate operation has consumed that data, and make sure it matches with the expected SHA1. This way we can rely on the CRC32 checked by the inflate operation to provide a good indication that the data is still coherent with its SHA1 hash. One pathological case we ignore is when the data is modified before (or during) deflate call, but changed back before it is hashed. There is some overhead of course. Using 'git add' on a set of large files: Before: real 0m25.210s user 0m23.783s sys 0m1.408s After: real 0m26.537s user 0m25.175s sys 0m1.358s The overhead is around 5% for full data coherency guarantee. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-21 21:48:06 +01:00
unsigned char *in0 = stream.next_in;
ret = git_deflate(&stream, Z_FINISH);
sha1_file: be paranoid when creating loose objects We don't want the data being deflated and stored into loose objects to be different from what we expect. While the deflated data is protected by a CRC which is good enough for safe data retrieval operations, we still want to be doubly sure that the source data used at object creation time is still what we expected once that data has been deflated and its CRC32 computed. The most plausible data corruption may occur if the source file is modified while Git is deflating and writing it out in a loose object. Or Git itself could have a bug causing memory corruption. Or even bad RAM could cause trouble. So it is best to make sure everything is coherent and checksum protected from beginning to end. To do so we compute the SHA1 of the data being deflated _after_ the deflate operation has consumed that data, and make sure it matches with the expected SHA1. This way we can rely on the CRC32 checked by the inflate operation to provide a good indication that the data is still coherent with its SHA1 hash. One pathological case we ignore is when the data is modified before (or during) deflate call, but changed back before it is hashed. There is some overhead of course. Using 'git add' on a set of large files: Before: real 0m25.210s user 0m23.783s sys 0m1.408s After: real 0m26.537s user 0m25.175s sys 0m1.358s The overhead is around 5% for full data coherency guarantee. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-21 21:48:06 +01:00
git_SHA1_Update(&c, in0, stream.next_in - in0);
if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
die("unable to write sha1 file");
stream.next_out = compressed;
stream.avail_out = sizeof(compressed);
} while (ret == Z_OK);
if (ret != Z_STREAM_END)
die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
ret = git_deflate_end_gently(&stream);
if (ret != Z_OK)
die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
sha1_file: be paranoid when creating loose objects We don't want the data being deflated and stored into loose objects to be different from what we expect. While the deflated data is protected by a CRC which is good enough for safe data retrieval operations, we still want to be doubly sure that the source data used at object creation time is still what we expected once that data has been deflated and its CRC32 computed. The most plausible data corruption may occur if the source file is modified while Git is deflating and writing it out in a loose object. Or Git itself could have a bug causing memory corruption. Or even bad RAM could cause trouble. So it is best to make sure everything is coherent and checksum protected from beginning to end. To do so we compute the SHA1 of the data being deflated _after_ the deflate operation has consumed that data, and make sure it matches with the expected SHA1. This way we can rely on the CRC32 checked by the inflate operation to provide a good indication that the data is still coherent with its SHA1 hash. One pathological case we ignore is when the data is modified before (or during) deflate call, but changed back before it is hashed. There is some overhead of course. Using 'git add' on a set of large files: Before: real 0m25.210s user 0m23.783s sys 0m1.408s After: real 0m26.537s user 0m25.175s sys 0m1.358s The overhead is around 5% for full data coherency guarantee. Signed-off-by: Nicolas Pitre <nico@fluxnic.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-02-21 21:48:06 +01:00
git_SHA1_Final(parano_sha1, &c);
if (hashcmp(sha1, parano_sha1) != 0)
die("confused by unstable object source data for %s", sha1_to_hex(sha1));
close_sha1_file(fd);
if (mtime) {
struct utimbuf utb;
utb.actime = mtime;
utb.modtime = mtime;
if (utime(tmp_file.buf, &utb) < 0)
warning_errno("failed utime() on %s", tmp_file.buf);
}
return finalize_object_file(tmp_file.buf, filename);
}
static int freshen_loose_object(const unsigned char *sha1)
{
return check_and_freshen(sha1, 1);
}
static int freshen_packed_object(const unsigned char *sha1)
{
struct pack_entry e;
if (!find_pack_entry(sha1, &e))
return 0;
if (e.p->freshened)
return 1;
if (!freshen_file(e.p->pack_name))
return 0;
e.p->freshened = 1;
return 1;
}
int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1)
{
char hdr[32];
int hdrlen = sizeof(hdr);
/* Normally if we have it in the pack then we do not bother writing
* it out into .git/objects/??/?{38} file.
*/
write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
return 0;
return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
}
int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type,
unsigned char *sha1, unsigned flags)
{
char *header;
int hdrlen, status = 0;
/* type string, SP, %lu of the length plus NUL must fit this */
hdrlen = strlen(type) + 32;
header = xmalloc(hdrlen);
write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);
if (!(flags & HASH_WRITE_OBJECT))
goto cleanup;
if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
goto cleanup;
status = write_loose_object(sha1, header, hdrlen, buf, len, 0);
cleanup:
free(header);
return status;
}
int force_object_loose(const unsigned char *sha1, time_t mtime)
{
void *buf;
unsigned long len;
enum object_type type;
char hdr[32];
int hdrlen;
int ret;
if (has_loose_object(sha1))
return 0;
buf = read_packed_sha1(sha1, &type, &len);
if (!buf)
return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1;
ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
free(buf);
return ret;
}
int has_pack_index(const unsigned char *sha1)
{
struct stat st;
if (stat(sha1_pack_index_name(sha1), &st))
return 0;
return 1;
}
int has_sha1_pack(const unsigned char *sha1)
{
struct pack_entry e;
return find_pack_entry(sha1, &e);
}
int has_sha1_file_with_flags(const unsigned char *sha1, int flags)
{
if (!startup_info->have_repository)
return 0;
return sha1_object_info_extended(sha1, NULL,
flags | OBJECT_INFO_SKIP_CACHED) >= 0;
}
int has_object_file(const struct object_id *oid)
{
return has_sha1_file(oid->hash);
}
fetch: use "quick" has_sha1_file for tag following When we auto-follow tags in a fetch, we look at all of the tags advertised by the remote and fetch ones where we don't already have the tag, but we do have the object it peels to. This involves a lot of calls to has_sha1_file(), some of which we can reasonably expect to fail. Since 45e8a74 (has_sha1_file: re-check pack directory before giving up, 2013-08-30), this may cause many calls to reprepare_packed_git(), which is potentially expensive. This has gone unnoticed for several years because it requires a fairly unique setup to matter: 1. You need to have a lot of packs on the client side to make reprepare_packed_git() expensive (the most expensive part is finding duplicates in an unsorted list, which is currently quadratic). 2. You need a large number of tag refs on the server side that are candidates for auto-following (i.e., that the client doesn't have). Each one triggers a re-read of the pack directory. 3. Under normal circumstances, the client would auto-follow those tags and after one large fetch, (2) would no longer be true. But if those tags point to history which is disconnected from what the client otherwise fetches, then it will never auto-follow, and those candidates will impact it on every fetch. So when all three are true, each fetch pays an extra O(nr_tags * nr_packs^2) cost, mostly in string comparisons on the pack names. This was exacerbated by 47bf4b0 (prepare_packed_git_one: refactor duplicate-pack check, 2014-06-30) which uses a slightly more expensive string check, under the assumption that the duplicate check doesn't happen very often (and it shouldn't; the real problem here is how often we are calling reprepare_packed_git()). This patch teaches fetch to use HAS_SHA1_QUICK to sacrifice accuracy for speed, in cases where we might be racy with a simultaneous repack. This is similar to the fix in 0eeb077 (index-pack: avoid excessive re-reading of pack directory, 2015-06-09). As with that case, it's OK for has_sha1_file() occasionally say "no I don't have it" when we do, because the worst case is not a corruption, but simply that we may fail to auto-follow a tag that points to it. Here are results from the included perf script, which sets up a situation similar to the one described above: Test HEAD^ HEAD ---------------------------------------------------------- 5550.4: fetch 11.21(10.42+0.78) 0.08(0.04+0.02) -99.3% Reported-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-10-13 18:53:44 +02:00
int has_object_file_with_flags(const struct object_id *oid, int flags)
{
return has_sha1_file_with_flags(oid->hash, flags);
}
static void check_tree(const void *buf, size_t size)
{
struct tree_desc desc;
struct name_entry entry;
init_tree_desc(&desc, buf, size);
while (tree_entry(&desc, &entry))
/* do nothing
* tree_entry() will die() on malformed entries */
;
}
static void check_commit(const void *buf, size_t size)
{
struct commit c;
memset(&c, 0, sizeof(c));
if (parse_commit_buffer(&c, buf, size))
die("corrupt commit");
}
static void check_tag(const void *buf, size_t size)
{
struct tag t;
memset(&t, 0, sizeof(t));
if (parse_tag_buffer(&t, buf, size))
die("corrupt tag");
}
static int index_mem(unsigned char *sha1, void *buf, size_t size,
enum object_type type,
const char *path, unsigned flags)
{
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
int ret, re_allocated = 0;
int write_object = flags & HASH_WRITE_OBJECT;
if (!type)
type = OBJ_BLOB;
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
/*
* Convert blobs to git internal format
*/
if ((type == OBJ_BLOB) && path) {
struct strbuf nbuf = STRBUF_INIT;
if (convert_to_git(&the_index, path, buf, size, &nbuf,
write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
buf = strbuf_detach(&nbuf, &size);
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
re_allocated = 1;
}
}
if (flags & HASH_FORMAT_CHECK) {
if (type == OBJ_TREE)
check_tree(buf, size);
if (type == OBJ_COMMIT)
check_commit(buf, size);
if (type == OBJ_TAG)
check_tag(buf, size);
}
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
if (write_object)
ret = write_sha1_file(buf, size, typename(type), sha1);
else
ret = hash_sha1_file(buf, size, typename(type), sha1);
if (re_allocated)
Lazy man's auto-CRLF It currently does NOT know about file attributes, so it does its conversion purely based on content. Maybe that is more in the "git philosophy" anyway, since content is king, but I think we should try to do the file attributes to turn it off on demand. Anyway, BY DEFAULT it is off regardless, because it requires a [core] AutoCRLF = true in your config file to be enabled. We could make that the default for Windows, of course, the same way we do some other things (filemode etc). But you can actually enable it on UNIX, and it will cause: - "git update-index" will write blobs without CRLF - "git diff" will diff working tree files without CRLF - "git checkout" will write files to the working tree _with_ CRLF and things work fine. Funnily, it actually shows an odd file in git itself: git clone -n git test-crlf cd test-crlf git config core.autocrlf true git checkout git diff shows a diff for "Documentation/docbook-xsl.css". Why? Because we have actually checked in that file *with* CRLF! So when "core.autocrlf" is true, we'll always generate a *different* hash for it in the index, because the index hash will be for the content _without_ CRLF. Is this complete? I dunno. It seems to work for me. It doesn't use the filename at all right now, and that's probably a deficiency (we could certainly make the "is_binary()" heuristics also take standard filename heuristics into account). I don't pass in the filename at all for the "index_fd()" case (git-update-index), so that would need to be passed around, but this actually works fine. NOTE NOTE NOTE! The "is_binary()" heuristics are totally made-up by yours truly. I will not guarantee that they work at all reasonable. Caveat emptor. But it _is_ simple, and it _is_ safe, since it's all off by default. The patch is pretty simple - the biggest part is the new "convert.c" file, but even that is really just basic stuff that anybody can write in "Teaching C 101" as a final project for their first class in programming. Not to say that it's bug-free, of course - but at least we're not talking about rocket surgery here. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-13 20:07:23 +01:00
free(buf);
return ret;
}
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 17:23:25 +02:00
static int index_stream_convert_blob(unsigned char *sha1, int fd,
const char *path, unsigned flags)
{
int ret;
const int write_object = flags & HASH_WRITE_OBJECT;
struct strbuf sbuf = STRBUF_INIT;
assert(path);
assert(would_convert_to_git_filter_fd(path));
convert_to_git_filter_fd(&the_index, path, fd, &sbuf,
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 17:23:25 +02:00
write_object ? safe_crlf : SAFE_CRLF_FALSE);
if (write_object)
ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
sha1);
else
ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
sha1);
strbuf_release(&sbuf);
return ret;
}
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
const char *path, unsigned flags)
{
struct strbuf sbuf = STRBUF_INIT;
int ret;
if (strbuf_read(&sbuf, fd, 4096) >= 0)
ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);
else
ret = -1;
strbuf_release(&sbuf);
return ret;
}
#define SMALL_FILE_SIZE (32*1024)
static int index_core(unsigned char *sha1, int fd, size_t size,
enum object_type type, const char *path,
unsigned flags)
{
int ret;
if (!size) {
ret = index_mem(sha1, "", size, type, path, flags);
} else if (size <= SMALL_FILE_SIZE) {
char *buf = xmalloc(size);
if (size == read_in_full(fd, buf, size))
ret = index_mem(sha1, buf, size, type, path, flags);
else
ret = error_errno("short read");
free(buf);
} else {
void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
ret = index_mem(sha1, buf, size, type, path, flags);
munmap(buf, size);
}
return ret;
}
/*
* This creates one packfile per large blob unless bulk-checkin
* machinery is "plugged".
*
* This also bypasses the usual "convert-to-git" dance, and that is on
* purpose. We could write a streaming version of the converting
* functions and insert that before feeding the data to fast-import
do not stream large files to pack when filters are in use Because git's object format requires us to specify the number of bytes in the object in its header, we must know the size before streaming a blob into the object database. This is not a problem when adding a regular file, as we can get the size from stat(). However, when filters are in use (such as autocrlf, or the ident, filter, or eol gitattributes), we have no idea what the ultimate size will be. The current code just punts on the whole issue and ignores filter configuration entirely for files larger than core.bigfilethreshold. This can generate confusing results if you use filters for large binary files, as the filter will suddenly stop working as the file goes over a certain size. Rather than try to handle unknown input sizes with streaming, this patch just turns off the streaming optimization when filters are in use. This has a slight performance regression in a very specific case: if you have autocrlf on, but no gitattributes, a large binary file will avoid the streaming code path because we don't know beforehand whether it will need conversion or not. But if you are handling large binary files, you should be marking them as such via attributes (or at least not using autocrlf, and instead marking your text files as such). And the flip side is that if you have a large _non_-binary file, there is a correctness improvement; before we did not apply the conversion at all. The first half of the new t1051 script covers these failures on input. The second half tests the matching output code paths. These already work correctly, and do not need any adjustment. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-02-24 23:10:17 +01:00
* (or equivalent in-core API described above). However, that is
* somewhat complicated, as we do not know the size of the filter
* result, which we need to know beforehand when writing a git object.
* Since the primary motivation for trying to stream from the working
* tree file and to avoid mmaping it in core is to deal with large
* binary blobs, they generally do not want to get any conversion, and
* callers should avoid this code path when filters are requested.
*/
static int index_stream(unsigned char *sha1, int fd, size_t size,
enum object_type type, const char *path,
unsigned flags)
{
return index_bulk_checkin(sha1, fd, size, type, path, flags);
}
int index_fd(unsigned char *sha1, int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
{
int ret;
/*
* Call xsize_t() only when needed to avoid potentially unnecessary
* die() for large files.
*/
convert: stream from fd to required clean filter to reduce used address space The data is streamed to the filter process anyway. Better avoid mapping the file if possible. This is especially useful if a clean filter reduces the size, for example if it computes a sha1 for binary data, like git media. The file size that the previous implementation could handle was limited by the available address space; large files for example could not be handled with (32-bit) msysgit. The new implementation can filter files of any size as long as the filter output is small enough. The new code path is only taken if the filter is required. The filter consumes data directly from the fd. If it fails, the original data is not immediately available. The condition can easily be handled as a fatal error, which is expected for a required filter anyway. If the filter was not required, the condition would need to be handled in a different way, like seeking to 0 and reading the data. But this would require more restructuring of the code and is probably not worth it. The obvious approach of falling back to reading all data would not help achieving the main purpose of this patch, which is to handle large files with limited address space. If reading all data is an option, we can simply take the old code path right away and mmap the entire file. The environment variable GIT_MMAP_LIMIT, which has been introduced in a previous commit is used to test that the expected code path is taken. A related test that exercises required filters is modified to verify that the data actually has been modified on its way from the file system to the object store. Signed-off-by: Steffen Prohaska <prohaska@zib.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-08-26 17:23:25 +02:00
if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
ret = index_stream_convert_blob(sha1, fd, path, flags);
else if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
(path && would_convert_to_git(&the_index, path)))
ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
flags);
else
ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
flags);
close(fd);
return ret;
}
int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags)
{
int fd;
struct strbuf sb = STRBUF_INIT;
switch (st->st_mode & S_IFMT) {
case S_IFREG:
fd = open(path, O_RDONLY);
if (fd < 0)
return error_errno("open(\"%s\")", path);
if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0)
return error("%s: failed to insert into database",
path);
break;
case S_IFLNK:
if (strbuf_readlink(&sb, path, st->st_size))
return error_errno("readlink(\"%s\")", path);
if (!(flags & HASH_WRITE_OBJECT))
hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
return error("%s: failed to insert into database",
path);
strbuf_release(&sb);
break;
case S_IFDIR:
return resolve_gitlink_ref(path, "HEAD", sha1);
default:
return error("%s: unsupported file type", path);
}
return 0;
}
int read_pack_header(int fd, struct pack_header *header)
{
if (read_in_full(fd, header, sizeof(*header)) != sizeof(*header))
/* "eof before pack header was fully read" */
return PH_ERROR_EOF;
if (header->hdr_signature != htonl(PACK_SIGNATURE))
/* "protocol error (pack signature mismatch detected)" */
return PH_ERROR_PACK_SIGNATURE;
if (!pack_version_ok(header->hdr_version))
/* "protocol error (pack version unsupported)" */
return PH_ERROR_PROTOCOL;
return 0;
}
void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
{
enum object_type type = sha1_object_info(sha1, NULL);
if (type < 0)
die("%s is not a valid object", sha1_to_hex(sha1));
if (type != expect)
die("%s is not a valid '%s' object", sha1_to_hex(sha1),
typename(expect));
}
int for_each_file_in_obj_subdir(unsigned int subdir_nr,
struct strbuf *path,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data)
{
size_t origlen, baselen;
DIR *dir;
struct dirent *de;
int r = 0;
if (subdir_nr > 0xff)
BUG("invalid loose object subdirectory: %x", subdir_nr);
origlen = path->len;
strbuf_complete(path, '/');
strbuf_addf(path, "%02x", subdir_nr);
baselen = path->len;
dir = opendir(path->buf);
if (!dir) {
if (errno != ENOENT)
r = error_errno("unable to open %s", path->buf);
strbuf_setlen(path, origlen);
return r;
}
while ((de = readdir(dir))) {
if (is_dot_or_dotdot(de->d_name))
continue;
strbuf_setlen(path, baselen);
strbuf_addf(path, "/%s", de->d_name);
if (strlen(de->d_name) == GIT_SHA1_HEXSZ - 2) {
char hex[GIT_MAX_HEXSZ+1];
struct object_id oid;
xsnprintf(hex, sizeof(hex), "%02x%s",
subdir_nr, de->d_name);
if (!get_oid_hex(hex, &oid)) {
if (obj_cb) {
r = obj_cb(&oid, path->buf, data);
if (r)
break;
}
continue;
}
}
if (cruft_cb) {
r = cruft_cb(de->d_name, path->buf, data);
if (r)
break;
}
}
closedir(dir);
strbuf_setlen(path, baselen);
if (!r && subdir_cb)
r = subdir_cb(subdir_nr, path->buf, data);
strbuf_setlen(path, origlen);
return r;
}
int for_each_loose_file_in_objdir_buf(struct strbuf *path,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data)
{
int r = 0;
int i;
for (i = 0; i < 256; i++) {
r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
subdir_cb, data);
if (r)
break;
}
return r;
}
int for_each_loose_file_in_objdir(const char *path,
each_loose_object_fn obj_cb,
each_loose_cruft_fn cruft_cb,
each_loose_subdir_fn subdir_cb,
void *data)
{
struct strbuf buf = STRBUF_INIT;
int r;
strbuf_addstr(&buf, path);
r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
subdir_cb, data);
strbuf_release(&buf);
return r;
}
struct loose_alt_odb_data {
each_loose_object_fn *cb;
void *data;
};
static int loose_from_alt_odb(struct alternate_object_database *alt,
void *vdata)
{
struct loose_alt_odb_data *data = vdata;
struct strbuf buf = STRBUF_INIT;
int r;
strbuf_addstr(&buf, alt->path);
r = for_each_loose_file_in_objdir_buf(&buf,
data->cb, NULL, NULL,
data->data);
strbuf_release(&buf);
return r;
}
reachable: only mark local objects as recent When pruning and repacking a repository that has an alternate object store configured, we may traverse a large number of objects in the alternate. This serves no purpose, and may be expensive to do. A longer explanation is below. Commits d3038d2 and abcb865 taught prune and pack-objects (respectively) to treat "recent" objects as tips for reachability, so that we keep whole chunks of history. They built on the object traversal in 660c889 (sha1_file: add for_each iterators for loose and packed objects, 2014-10-15), which covers both local and alternate objects. In both cases, covering alternate objects is unnecessary, as both commands can only drop objects from the local repository. In the case of prune, we traverse only the local object directory. And in the case of repacking, while we may or may not include local objects in our pack, we will never reach into the alternate with "repack -d". The "-l" option is only a question of whether we are migrating objects from the alternate into our repository, or leaving them untouched. It is possible that we may drop an object that is depended upon by another object in the alternate. For example, imagine two repositories, A and B, with A pointing to B as an alternate. Now imagine a commit that is in B which references a tree that is only in A. Traversing from recent objects in B might prevent A from dropping that tree. But this case isn't worth covering. Repo B should take responsibility for its own objects. It would never have had the commit in the first place if it did not also have the tree, and assuming it is using the same "keep recent chunks of history" scheme, then it would itself keep the tree, as well. So checking the alternate objects is not worth doing, and come with a significant performance impact. In both cases, we skip any recent objects that have already been marked SEEN (i.e., that we know are already reachable for prune, or included in the pack for a repack). So there is a slight waste of time in opening the alternate packs at all, only to notice that we have already considered each object. But much worse, the alternate repository may have a large number of objects that are not reachable from the local repository at all, and we end up adding them to the traversal. We can fix this by considering only local unseen objects. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
{
struct loose_alt_odb_data alt;
int r;
r = for_each_loose_file_in_objdir(get_object_directory(),
cb, NULL, NULL, data);
if (r)
return r;
reachable: only mark local objects as recent When pruning and repacking a repository that has an alternate object store configured, we may traverse a large number of objects in the alternate. This serves no purpose, and may be expensive to do. A longer explanation is below. Commits d3038d2 and abcb865 taught prune and pack-objects (respectively) to treat "recent" objects as tips for reachability, so that we keep whole chunks of history. They built on the object traversal in 660c889 (sha1_file: add for_each iterators for loose and packed objects, 2014-10-15), which covers both local and alternate objects. In both cases, covering alternate objects is unnecessary, as both commands can only drop objects from the local repository. In the case of prune, we traverse only the local object directory. And in the case of repacking, while we may or may not include local objects in our pack, we will never reach into the alternate with "repack -d". The "-l" option is only a question of whether we are migrating objects from the alternate into our repository, or leaving them untouched. It is possible that we may drop an object that is depended upon by another object in the alternate. For example, imagine two repositories, A and B, with A pointing to B as an alternate. Now imagine a commit that is in B which references a tree that is only in A. Traversing from recent objects in B might prevent A from dropping that tree. But this case isn't worth covering. Repo B should take responsibility for its own objects. It would never have had the commit in the first place if it did not also have the tree, and assuming it is using the same "keep recent chunks of history" scheme, then it would itself keep the tree, as well. So checking the alternate objects is not worth doing, and come with a significant performance impact. In both cases, we skip any recent objects that have already been marked SEEN (i.e., that we know are already reachable for prune, or included in the pack for a repack). So there is a slight waste of time in opening the alternate packs at all, only to notice that we have already considered each object. But much worse, the alternate repository may have a large number of objects that are not reachable from the local repository at all, and we end up adding them to the traversal. We can fix this by considering only local unseen objects. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
return 0;
alt.cb = cb;
alt.data = data;
return foreach_alt_odb(loose_from_alt_odb, &alt);
}
static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
{
uint32_t i;
int r = 0;
for (i = 0; i < p->num_objects; i++) {
struct object_id oid;
if (!nth_packed_object_oid(&oid, p, i))
return error("unable to get sha1 of object %u in %s",
i, p->pack_name);
r = cb(&oid, p, i, data);
if (r)
break;
}
return r;
}
reachable: only mark local objects as recent When pruning and repacking a repository that has an alternate object store configured, we may traverse a large number of objects in the alternate. This serves no purpose, and may be expensive to do. A longer explanation is below. Commits d3038d2 and abcb865 taught prune and pack-objects (respectively) to treat "recent" objects as tips for reachability, so that we keep whole chunks of history. They built on the object traversal in 660c889 (sha1_file: add for_each iterators for loose and packed objects, 2014-10-15), which covers both local and alternate objects. In both cases, covering alternate objects is unnecessary, as both commands can only drop objects from the local repository. In the case of prune, we traverse only the local object directory. And in the case of repacking, while we may or may not include local objects in our pack, we will never reach into the alternate with "repack -d". The "-l" option is only a question of whether we are migrating objects from the alternate into our repository, or leaving them untouched. It is possible that we may drop an object that is depended upon by another object in the alternate. For example, imagine two repositories, A and B, with A pointing to B as an alternate. Now imagine a commit that is in B which references a tree that is only in A. Traversing from recent objects in B might prevent A from dropping that tree. But this case isn't worth covering. Repo B should take responsibility for its own objects. It would never have had the commit in the first place if it did not also have the tree, and assuming it is using the same "keep recent chunks of history" scheme, then it would itself keep the tree, as well. So checking the alternate objects is not worth doing, and come with a significant performance impact. In both cases, we skip any recent objects that have already been marked SEEN (i.e., that we know are already reachable for prune, or included in the pack for a repack). So there is a slight waste of time in opening the alternate packs at all, only to notice that we have already considered each object. But much worse, the alternate repository may have a large number of objects that are not reachable from the local repository at all, and we end up adding them to the traversal. We can fix this by considering only local unseen objects. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
{
struct packed_git *p;
int r = 0;
int pack_errors = 0;
prepare_packed_git();
for (p = packed_git; p; p = p->next) {
reachable: only mark local objects as recent When pruning and repacking a repository that has an alternate object store configured, we may traverse a large number of objects in the alternate. This serves no purpose, and may be expensive to do. A longer explanation is below. Commits d3038d2 and abcb865 taught prune and pack-objects (respectively) to treat "recent" objects as tips for reachability, so that we keep whole chunks of history. They built on the object traversal in 660c889 (sha1_file: add for_each iterators for loose and packed objects, 2014-10-15), which covers both local and alternate objects. In both cases, covering alternate objects is unnecessary, as both commands can only drop objects from the local repository. In the case of prune, we traverse only the local object directory. And in the case of repacking, while we may or may not include local objects in our pack, we will never reach into the alternate with "repack -d". The "-l" option is only a question of whether we are migrating objects from the alternate into our repository, or leaving them untouched. It is possible that we may drop an object that is depended upon by another object in the alternate. For example, imagine two repositories, A and B, with A pointing to B as an alternate. Now imagine a commit that is in B which references a tree that is only in A. Traversing from recent objects in B might prevent A from dropping that tree. But this case isn't worth covering. Repo B should take responsibility for its own objects. It would never have had the commit in the first place if it did not also have the tree, and assuming it is using the same "keep recent chunks of history" scheme, then it would itself keep the tree, as well. So checking the alternate objects is not worth doing, and come with a significant performance impact. In both cases, we skip any recent objects that have already been marked SEEN (i.e., that we know are already reachable for prune, or included in the pack for a repack). So there is a slight waste of time in opening the alternate packs at all, only to notice that we have already considered each object. But much worse, the alternate repository may have a large number of objects that are not reachable from the local repository at all, and we end up adding them to the traversal. We can fix this by considering only local unseen objects. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-03-27 12:32:41 +01:00
if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
continue;
if (open_pack_index(p)) {
pack_errors = 1;
continue;
}
r = for_each_object_in_pack(p, cb, data);
if (r)
break;
}
return r ? r : pack_errors;
}
static int check_stream_sha1(git_zstream *stream,
const char *hdr,
unsigned long size,
const char *path,
const unsigned char *expected_sha1)
{
git_SHA_CTX c;
unsigned char real_sha1[GIT_MAX_RAWSZ];
unsigned char buf[4096];
unsigned long total_read;
int status = Z_OK;
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, stream->total_out);
/*
* We already read some bytes into hdr, but the ones up to the NUL
* do not count against the object's content size.
*/
total_read = stream->total_out - strlen(hdr) - 1;
/*
* This size comparison must be "<=" to read the final zlib packets;
* see the comment in unpack_sha1_rest for details.
*/
while (total_read <= size &&
(status == Z_OK || status == Z_BUF_ERROR)) {
stream->next_out = buf;
stream->avail_out = sizeof(buf);
if (size - total_read < stream->avail_out)
stream->avail_out = size - total_read;
status = git_inflate(stream, Z_FINISH);
git_SHA1_Update(&c, buf, stream->next_out - buf);
total_read += stream->next_out - buf;
}
git_inflate_end(stream);
if (status != Z_STREAM_END) {
error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
return -1;
}
if (stream->avail_in) {
error("garbage at end of loose object '%s'",
sha1_to_hex(expected_sha1));
return -1;
}
git_SHA1_Final(real_sha1, &c);
if (hashcmp(expected_sha1, real_sha1)) {
error("sha1 mismatch for %s (expected %s)", path,
sha1_to_hex(expected_sha1));
return -1;
}
return 0;
}
int read_loose_object(const char *path,
const unsigned char *expected_sha1,
enum object_type *type,
unsigned long *size,
void **contents)
{
int ret = -1;
void *map = NULL;
unsigned long mapsize;
git_zstream stream;
char hdr[32];
*contents = NULL;
map = map_sha1_file_1(path, NULL, &mapsize);
if (!map) {
error_errno("unable to mmap %s", path);
goto out;
}
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
error("unable to unpack header of %s", path);
goto out;
}
*type = parse_sha1_header(hdr, size);
if (*type < 0) {
error("unable to parse header of %s", path);
git_inflate_end(&stream);
goto out;
}
if (*type == OBJ_BLOB) {
if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
goto out;
} else {
*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
if (!*contents) {
error("unable to unpack contents of %s", path);
git_inflate_end(&stream);
goto out;
}
if (check_sha1_signature(expected_sha1, *contents,
*size, typename(*type))) {
error("sha1 mismatch for %s (expected %s)", path,
sha1_to_hex(expected_sha1));
free(*contents);
goto out;
}
}
ret = 0; /* everything checks out */
out:
if (map)
munmap(map, mapsize);
return ret;
}