1
0
mirror of https://github.com/git/git.git synced 2024-09-22 23:00:57 +02:00
git/builtin/gc.c
Junio C Hamano 92251b1b5b Merge branch 'nd/shallow-clone'
Fetching from a shallow-cloned repository used to be forbidden,
primarily because the codepaths involved were not carefully vetted
and we did not bother supporting such usage. This attempts to allow
object transfer out of a shallow-cloned repository in a controlled
way (i.e. the receiver become a shallow repository with truncated
history).

* nd/shallow-clone: (31 commits)
  t5537: fix incorrect expectation in test case 10
  shallow: remove unused code
  send-pack.c: mark a file-local function static
  git-clone.txt: remove shallow clone limitations
  prune: clean .git/shallow after pruning objects
  clone: use git protocol for cloning shallow repo locally
  send-pack: support pushing from a shallow clone via http
  receive-pack: support pushing to a shallow clone via http
  smart-http: support shallow fetch/clone
  remote-curl: pass ref SHA-1 to fetch-pack as well
  send-pack: support pushing to a shallow clone
  receive-pack: allow pushes that update .git/shallow
  connected.c: add new variant that runs with --shallow-file
  add GIT_SHALLOW_FILE to propagate --shallow-file to subprocesses
  receive/send-pack: support pushing from a shallow clone
  receive-pack: reorder some code in unpack()
  fetch: add --update-shallow to accept refs that update .git/shallow
  upload-pack: make sure deepening preserves shallow roots
  fetch: support fetching from a shallow repository
  clone: support remote shallow repository
  ...
2014-01-17 12:21:20 -08:00

346 lines
8.5 KiB
C

/*
* git gc builtin command
*
* Cleanup unreachable files and optimize the repository.
*
* Copyright (c) 2007 James Bowes
*
* Based on git-gc.sh, which is
*
* Copyright (c) 2006 Shawn O. Pearce
*/
#include "builtin.h"
#include "cache.h"
#include "parse-options.h"
#include "run-command.h"
#include "sigchain.h"
#include "argv-array.h"
#include "commit.h"
#define FAILED_RUN "failed to run %s"
static const char * const builtin_gc_usage[] = {
N_("git gc [options]"),
NULL
};
static int pack_refs = 1;
static int aggressive_window = 250;
static int gc_auto_threshold = 6700;
static int gc_auto_pack_limit = 50;
static const char *prune_expire = "2.weeks.ago";
static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
static struct argv_array reflog = ARGV_ARRAY_INIT;
static struct argv_array repack = ARGV_ARRAY_INIT;
static struct argv_array prune = ARGV_ARRAY_INIT;
static struct argv_array rerere = ARGV_ARRAY_INIT;
static char *pidfile;
static void remove_pidfile(void)
{
if (pidfile)
unlink(pidfile);
}
static void remove_pidfile_on_signal(int signo)
{
remove_pidfile();
sigchain_pop(signo);
raise(signo);
}
static int gc_config(const char *var, const char *value, void *cb)
{
if (!strcmp(var, "gc.packrefs")) {
if (value && !strcmp(value, "notbare"))
pack_refs = -1;
else
pack_refs = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "gc.aggressivewindow")) {
aggressive_window = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "gc.auto")) {
gc_auto_threshold = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "gc.autopacklimit")) {
gc_auto_pack_limit = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "gc.pruneexpire")) {
if (value && strcmp(value, "now")) {
unsigned long now = approxidate("now");
if (approxidate(value) >= now)
return error(_("Invalid %s: '%s'"), var, value);
}
return git_config_string(&prune_expire, var, value);
}
return git_default_config(var, value, cb);
}
static int too_many_loose_objects(void)
{
/*
* Quickly check if a "gc" is needed, by estimating how
* many loose objects there are. Because SHA-1 is evenly
* distributed, we can check only one and get a reasonable
* estimate.
*/
char path[PATH_MAX];
const char *objdir = get_object_directory();
DIR *dir;
struct dirent *ent;
int auto_threshold;
int num_loose = 0;
int needed = 0;
if (gc_auto_threshold <= 0)
return 0;
if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) {
warning(_("insanely long object directory %.*s"), 50, objdir);
return 0;
}
dir = opendir(path);
if (!dir)
return 0;
auto_threshold = (gc_auto_threshold + 255) / 256;
while ((ent = readdir(dir)) != NULL) {
if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
ent->d_name[38] != '\0')
continue;
if (++num_loose > auto_threshold) {
needed = 1;
break;
}
}
closedir(dir);
return needed;
}
static int too_many_packs(void)
{
struct packed_git *p;
int cnt;
if (gc_auto_pack_limit <= 0)
return 0;
prepare_packed_git();
for (cnt = 0, p = packed_git; p; p = p->next) {
if (!p->pack_local)
continue;
if (p->pack_keep)
continue;
/*
* Perhaps check the size of the pack and count only
* very small ones here?
*/
cnt++;
}
return gc_auto_pack_limit <= cnt;
}
static void add_repack_all_option(void)
{
if (prune_expire && !strcmp(prune_expire, "now"))
argv_array_push(&repack, "-a");
else {
argv_array_push(&repack, "-A");
if (prune_expire)
argv_array_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
}
}
static int need_to_gc(void)
{
/*
* Setting gc.auto to 0 or negative can disable the
* automatic gc.
*/
if (gc_auto_threshold <= 0)
return 0;
/*
* If there are too many loose objects, but not too many
* packs, we run "repack -d -l". If there are too many packs,
* we run "repack -A -d -l". Otherwise we tell the caller
* there is no need.
*/
if (too_many_packs())
add_repack_all_option();
else if (!too_many_loose_objects())
return 0;
if (run_hook(NULL, "pre-auto-gc", NULL))
return 0;
return 1;
}
/* return NULL on success, else hostname running the gc */
static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
{
static struct lock_file lock;
static char locking_host[128];
char my_host[128];
struct strbuf sb = STRBUF_INIT;
struct stat st;
uintmax_t pid;
FILE *fp;
int fd, should_exit;
if (pidfile)
/* already locked */
return NULL;
if (gethostname(my_host, sizeof(my_host)))
strcpy(my_host, "unknown");
fd = hold_lock_file_for_update(&lock, git_path("gc.pid"),
LOCK_DIE_ON_ERROR);
if (!force) {
fp = fopen(git_path("gc.pid"), "r");
memset(locking_host, 0, sizeof(locking_host));
should_exit =
fp != NULL &&
!fstat(fileno(fp), &st) &&
/*
* 12 hour limit is very generous as gc should
* never take that long. On the other hand we
* don't really need a strict limit here,
* running gc --auto one day late is not a big
* problem. --force can be used in manual gc
* after the user verifies that no gc is
* running.
*/
time(NULL) - st.st_mtime <= 12 * 3600 &&
fscanf(fp, "%"PRIuMAX" %127c", &pid, locking_host) == 2 &&
/* be gentle to concurrent "gc" on remote hosts */
(strcmp(locking_host, my_host) || !kill(pid, 0) || errno == EPERM);
if (fp != NULL)
fclose(fp);
if (should_exit) {
if (fd >= 0)
rollback_lock_file(&lock);
*ret_pid = pid;
return locking_host;
}
}
strbuf_addf(&sb, "%"PRIuMAX" %s",
(uintmax_t) getpid(), my_host);
write_in_full(fd, sb.buf, sb.len);
strbuf_release(&sb);
commit_lock_file(&lock);
pidfile = git_pathdup("gc.pid");
sigchain_push_common(remove_pidfile_on_signal);
atexit(remove_pidfile);
return NULL;
}
int cmd_gc(int argc, const char **argv, const char *prefix)
{
int aggressive = 0;
int auto_gc = 0;
int quiet = 0;
int force = 0;
const char *name;
pid_t pid;
struct option builtin_gc_options[] = {
OPT__QUIET(&quiet, N_("suppress progress reporting")),
{ OPTION_STRING, 0, "prune", &prune_expire, N_("date"),
N_("prune unreferenced objects"),
PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
OPT_BOOL(0, "auto", &auto_gc, N_("enable auto-gc mode")),
OPT_BOOL(0, "force", &force, N_("force running gc even if there may be another gc running")),
OPT_END()
};
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(builtin_gc_usage, builtin_gc_options);
argv_array_pushl(&pack_refs_cmd, "pack-refs", "--all", "--prune", NULL);
argv_array_pushl(&reflog, "reflog", "expire", "--all", NULL);
argv_array_pushl(&repack, "repack", "-d", "-l", NULL);
argv_array_pushl(&prune, "prune", "--expire", NULL );
argv_array_pushl(&rerere, "rerere", "gc", NULL);
git_config(gc_config, NULL);
if (pack_refs < 0)
pack_refs = !is_bare_repository();
argc = parse_options(argc, argv, prefix, builtin_gc_options,
builtin_gc_usage, 0);
if (argc > 0)
usage_with_options(builtin_gc_usage, builtin_gc_options);
if (aggressive) {
argv_array_push(&repack, "-f");
argv_array_push(&repack, "--depth=250");
if (aggressive_window > 0)
argv_array_pushf(&repack, "--window=%d", aggressive_window);
}
if (quiet)
argv_array_push(&repack, "-q");
if (auto_gc) {
/*
* Auto-gc should be least intrusive as possible.
*/
if (!need_to_gc())
return 0;
if (!quiet)
fprintf(stderr,
_("Auto packing the repository for optimum performance. You may also\n"
"run \"git gc\" manually. See "
"\"git help gc\" for more information.\n"));
} else
add_repack_all_option();
name = lock_repo_for_gc(force, &pid);
if (name) {
if (auto_gc)
return 0; /* be quiet on --auto */
die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"),
name, (uintmax_t)pid);
}
if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD))
return error(FAILED_RUN, pack_refs_cmd.argv[0]);
if (run_command_v_opt(reflog.argv, RUN_GIT_CMD))
return error(FAILED_RUN, reflog.argv[0]);
if (run_command_v_opt(repack.argv, RUN_GIT_CMD))
return error(FAILED_RUN, repack.argv[0]);
if (prune_expire) {
argv_array_push(&prune, prune_expire);
if (quiet)
argv_array_push(&prune, "--no-progress");
if (run_command_v_opt(prune.argv, RUN_GIT_CMD))
return error(FAILED_RUN, prune.argv[0]);
}
if (run_command_v_opt(rerere.argv, RUN_GIT_CMD))
return error(FAILED_RUN, rerere.argv[0]);
if (auto_gc && too_many_loose_objects())
warning(_("There are too many unreachable loose objects; "
"run 'git prune' to remove them."));
return 0;
}