1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-24 16:36:21 +02:00
git/builtin/gc.c
Nguyễn Thái Ngọc Duy ae4e89e549 gc: add --keep-largest-pack option
This adds a new repack mode that combines everything into a secondary
pack, leaving the largest pack alone.

This could help reduce memory pressure. On linux-2.6.git, valgrind
massif reports 1.6GB heap in "pack all" case, and 535MB in "pack
all except the base pack" case. We save roughly 1GB memory by
excluding the base pack.

This should also lower I/O because we don't have to rewrite a giant
pack every time (e.g. for linux-2.6.git that's a 1.4GB pack file)..

PS. The use of string_list here seems overkill, but we'll need it in
the next patch...

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-04-16 13:52:29 +09:00

533 lines
13 KiB
C

/*
* git gc builtin command
*
* Cleanup unreachable files and optimize the repository.
*
* Copyright (c) 2007 James Bowes
*
* Based on git-gc.sh, which is
*
* Copyright (c) 2006 Shawn O. Pearce
*/
#include "builtin.h"
#include "repository.h"
#include "config.h"
#include "tempfile.h"
#include "lockfile.h"
#include "parse-options.h"
#include "run-command.h"
#include "sigchain.h"
#include "argv-array.h"
#include "commit.h"
#include "packfile.h"
#include "object-store.h"
#define FAILED_RUN "failed to run %s"
static const char * const builtin_gc_usage[] = {
N_("git gc [<options>]"),
NULL
};
static int pack_refs = 1;
static int prune_reflogs = 1;
static int aggressive_depth = 50;
static int aggressive_window = 250;
static int gc_auto_threshold = 6700;
static int gc_auto_pack_limit = 50;
static int detach_auto = 1;
static timestamp_t gc_log_expire_time;
static const char *gc_log_expire = "1.day.ago";
static const char *prune_expire = "2.weeks.ago";
static const char *prune_worktrees_expire = "3.months.ago";
static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
static struct argv_array reflog = ARGV_ARRAY_INIT;
static struct argv_array repack = ARGV_ARRAY_INIT;
static struct argv_array prune = ARGV_ARRAY_INIT;
static struct argv_array prune_worktrees = ARGV_ARRAY_INIT;
static struct argv_array rerere = ARGV_ARRAY_INIT;
static struct tempfile *pidfile;
static struct lock_file log_lock;
static struct string_list pack_garbage = STRING_LIST_INIT_DUP;
static void clean_pack_garbage(void)
{
int i;
for (i = 0; i < pack_garbage.nr; i++)
unlink_or_warn(pack_garbage.items[i].string);
string_list_clear(&pack_garbage, 0);
}
static void report_pack_garbage(unsigned seen_bits, const char *path)
{
if (seen_bits == PACKDIR_FILE_IDX)
string_list_append(&pack_garbage, path);
}
static void process_log_file(void)
{
struct stat st;
if (fstat(get_lock_file_fd(&log_lock), &st)) {
/*
* Perhaps there was an i/o error or another
* unlikely situation. Try to make a note of
* this in gc.log along with any existing
* messages.
*/
int saved_errno = errno;
fprintf(stderr, _("Failed to fstat %s: %s"),
get_tempfile_path(log_lock.tempfile),
strerror(saved_errno));
fflush(stderr);
commit_lock_file(&log_lock);
errno = saved_errno;
} else if (st.st_size) {
/* There was some error recorded in the lock file */
commit_lock_file(&log_lock);
} else {
/* No error, clean up any old gc.log */
unlink(git_path("gc.log"));
rollback_lock_file(&log_lock);
}
}
static void process_log_file_at_exit(void)
{
fflush(stderr);
process_log_file();
}
static void process_log_file_on_signal(int signo)
{
process_log_file();
sigchain_pop(signo);
raise(signo);
}
static void gc_config(void)
{
const char *value;
if (!git_config_get_value("gc.packrefs", &value)) {
if (value && !strcmp(value, "notbare"))
pack_refs = -1;
else
pack_refs = git_config_bool("gc.packrefs", value);
}
git_config_get_int("gc.aggressivewindow", &aggressive_window);
git_config_get_int("gc.aggressivedepth", &aggressive_depth);
git_config_get_int("gc.auto", &gc_auto_threshold);
git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit);
git_config_get_bool("gc.autodetach", &detach_auto);
git_config_get_expiry("gc.pruneexpire", &prune_expire);
git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
git_config_get_expiry("gc.logexpiry", &gc_log_expire);
git_config(git_default_config, NULL);
}
static int too_many_loose_objects(void)
{
/*
* Quickly check if a "gc" is needed, by estimating how
* many loose objects there are. Because SHA-1 is evenly
* distributed, we can check only one and get a reasonable
* estimate.
*/
DIR *dir;
struct dirent *ent;
int auto_threshold;
int num_loose = 0;
int needed = 0;
if (gc_auto_threshold <= 0)
return 0;
dir = opendir(git_path("objects/17"));
if (!dir)
return 0;
auto_threshold = DIV_ROUND_UP(gc_auto_threshold, 256);
while ((ent = readdir(dir)) != NULL) {
if (strspn(ent->d_name, "0123456789abcdef") != 38 ||
ent->d_name[38] != '\0')
continue;
if (++num_loose > auto_threshold) {
needed = 1;
break;
}
}
closedir(dir);
return needed;
}
static void find_base_packs(struct string_list *packs)
{
struct packed_git *p, *base = NULL;
for (p = get_packed_git(the_repository); p; p = p->next) {
if (!p->pack_local)
continue;
if (!base || base->pack_size < p->pack_size) {
base = p;
}
}
if (base)
string_list_append(packs, base->pack_name);
}
static int too_many_packs(void)
{
struct packed_git *p;
int cnt;
if (gc_auto_pack_limit <= 0)
return 0;
for (cnt = 0, p = get_packed_git(the_repository); p; p = p->next) {
if (!p->pack_local)
continue;
if (p->pack_keep)
continue;
/*
* Perhaps check the size of the pack and count only
* very small ones here?
*/
cnt++;
}
return gc_auto_pack_limit < cnt;
}
static int keep_one_pack(struct string_list_item *item, void *data)
{
argv_array_pushf(&repack, "--keep-pack=%s", basename(item->string));
return 0;
}
static void add_repack_all_option(struct string_list *keep_pack)
{
if (prune_expire && !strcmp(prune_expire, "now"))
argv_array_push(&repack, "-a");
else {
argv_array_push(&repack, "-A");
if (prune_expire)
argv_array_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
}
if (keep_pack)
for_each_string_list(keep_pack, keep_one_pack, NULL);
}
static void add_repack_incremental_option(void)
{
argv_array_push(&repack, "--no-write-bitmap-index");
}
static int need_to_gc(void)
{
/*
* Setting gc.auto to 0 or negative can disable the
* automatic gc.
*/
if (gc_auto_threshold <= 0)
return 0;
/*
* If there are too many loose objects, but not too many
* packs, we run "repack -d -l". If there are too many packs,
* we run "repack -A -d -l". Otherwise we tell the caller
* there is no need.
*/
if (too_many_packs())
add_repack_all_option(NULL);
else if (too_many_loose_objects())
add_repack_incremental_option();
else
return 0;
if (run_hook_le(NULL, "pre-auto-gc", NULL))
return 0;
return 1;
}
/* return NULL on success, else hostname running the gc */
static const char *lock_repo_for_gc(int force, pid_t* ret_pid)
{
static struct lock_file lock;
char my_host[HOST_NAME_MAX + 1];
struct strbuf sb = STRBUF_INIT;
struct stat st;
uintmax_t pid;
FILE *fp;
int fd;
char *pidfile_path;
if (is_tempfile_active(pidfile))
/* already locked */
return NULL;
if (xgethostname(my_host, sizeof(my_host)))
xsnprintf(my_host, sizeof(my_host), "unknown");
pidfile_path = git_pathdup("gc.pid");
fd = hold_lock_file_for_update(&lock, pidfile_path,
LOCK_DIE_ON_ERROR);
if (!force) {
static char locking_host[HOST_NAME_MAX + 1];
static char *scan_fmt;
int should_exit;
if (!scan_fmt)
scan_fmt = xstrfmt("%s %%%ds", "%"SCNuMAX, HOST_NAME_MAX);
fp = fopen(pidfile_path, "r");
memset(locking_host, 0, sizeof(locking_host));
should_exit =
fp != NULL &&
!fstat(fileno(fp), &st) &&
/*
* 12 hour limit is very generous as gc should
* never take that long. On the other hand we
* don't really need a strict limit here,
* running gc --auto one day late is not a big
* problem. --force can be used in manual gc
* after the user verifies that no gc is
* running.
*/
time(NULL) - st.st_mtime <= 12 * 3600 &&
fscanf(fp, scan_fmt, &pid, locking_host) == 2 &&
/* be gentle to concurrent "gc" on remote hosts */
(strcmp(locking_host, my_host) || !kill(pid, 0) || errno == EPERM);
if (fp != NULL)
fclose(fp);
if (should_exit) {
if (fd >= 0)
rollback_lock_file(&lock);
*ret_pid = pid;
free(pidfile_path);
return locking_host;
}
}
strbuf_addf(&sb, "%"PRIuMAX" %s",
(uintmax_t) getpid(), my_host);
write_in_full(fd, sb.buf, sb.len);
strbuf_release(&sb);
commit_lock_file(&lock);
pidfile = register_tempfile(pidfile_path);
free(pidfile_path);
return NULL;
}
static int report_last_gc_error(void)
{
struct strbuf sb = STRBUF_INIT;
int ret = 0;
struct stat st;
char *gc_log_path = git_pathdup("gc.log");
if (stat(gc_log_path, &st)) {
if (errno == ENOENT)
goto done;
ret = error_errno(_("Can't stat %s"), gc_log_path);
goto done;
}
if (st.st_mtime < gc_log_expire_time)
goto done;
ret = strbuf_read_file(&sb, gc_log_path, 0);
if (ret > 0)
ret = error(_("The last gc run reported the following. "
"Please correct the root cause\n"
"and remove %s.\n"
"Automatic cleanup will not be performed "
"until the file is removed.\n\n"
"%s"),
gc_log_path, sb.buf);
strbuf_release(&sb);
done:
free(gc_log_path);
return ret;
}
static int gc_before_repack(void)
{
if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD))
return error(FAILED_RUN, pack_refs_cmd.argv[0]);
if (prune_reflogs && run_command_v_opt(reflog.argv, RUN_GIT_CMD))
return error(FAILED_RUN, reflog.argv[0]);
pack_refs = 0;
prune_reflogs = 0;
return 0;
}
int cmd_gc(int argc, const char **argv, const char *prefix)
{
int aggressive = 0;
int auto_gc = 0;
int quiet = 0;
int force = 0;
const char *name;
pid_t pid;
int daemonized = 0;
int keep_base_pack = -1;
struct option builtin_gc_options[] = {
OPT__QUIET(&quiet, N_("suppress progress reporting")),
{ OPTION_STRING, 0, "prune", &prune_expire, N_("date"),
N_("prune unreferenced objects"),
PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"),
PARSE_OPT_NOCOMPLETE),
OPT_BOOL_F(0, "force", &force,
N_("force running gc even if there may be another gc running"),
PARSE_OPT_NOCOMPLETE),
OPT_BOOL(0, "keep-largest-pack", &keep_base_pack,
N_("repack all other packs except the largest pack")),
OPT_END()
};
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(builtin_gc_usage, builtin_gc_options);
argv_array_pushl(&pack_refs_cmd, "pack-refs", "--all", "--prune", NULL);
argv_array_pushl(&reflog, "reflog", "expire", "--all", NULL);
argv_array_pushl(&repack, "repack", "-d", "-l", NULL);
argv_array_pushl(&prune, "prune", "--expire", NULL);
argv_array_pushl(&prune_worktrees, "worktree", "prune", "--expire", NULL);
argv_array_pushl(&rerere, "rerere", "gc", NULL);
/* default expiry time, overwritten in gc_config */
gc_config();
if (parse_expiry_date(gc_log_expire, &gc_log_expire_time))
die(_("Failed to parse gc.logexpiry value %s"), gc_log_expire);
if (pack_refs < 0)
pack_refs = !is_bare_repository();
argc = parse_options(argc, argv, prefix, builtin_gc_options,
builtin_gc_usage, 0);
if (argc > 0)
usage_with_options(builtin_gc_usage, builtin_gc_options);
if (aggressive) {
argv_array_push(&repack, "-f");
if (aggressive_depth > 0)
argv_array_pushf(&repack, "--depth=%d", aggressive_depth);
if (aggressive_window > 0)
argv_array_pushf(&repack, "--window=%d", aggressive_window);
}
if (quiet)
argv_array_push(&repack, "-q");
if (auto_gc) {
/*
* Auto-gc should be least intrusive as possible.
*/
if (!need_to_gc())
return 0;
if (!quiet) {
if (detach_auto)
fprintf(stderr, _("Auto packing the repository in background for optimum performance.\n"));
else
fprintf(stderr, _("Auto packing the repository for optimum performance.\n"));
fprintf(stderr, _("See \"git help gc\" for manual housekeeping.\n"));
}
if (detach_auto) {
if (report_last_gc_error())
return -1;
if (lock_repo_for_gc(force, &pid))
return 0;
if (gc_before_repack())
return -1;
delete_tempfile(&pidfile);
/*
* failure to daemonize is ok, we'll continue
* in foreground
*/
daemonized = !daemonize();
}
} else {
struct string_list keep_pack = STRING_LIST_INIT_NODUP;
if (keep_base_pack != -1) {
if (keep_base_pack)
find_base_packs(&keep_pack);
}
add_repack_all_option(&keep_pack);
string_list_clear(&keep_pack, 0);
}
name = lock_repo_for_gc(force, &pid);
if (name) {
if (auto_gc)
return 0; /* be quiet on --auto */
die(_("gc is already running on machine '%s' pid %"PRIuMAX" (use --force if not)"),
name, (uintmax_t)pid);
}
if (daemonized) {
hold_lock_file_for_update(&log_lock,
git_path("gc.log"),
LOCK_DIE_ON_ERROR);
dup2(get_lock_file_fd(&log_lock), 2);
sigchain_push_common(process_log_file_on_signal);
atexit(process_log_file_at_exit);
}
if (gc_before_repack())
return -1;
if (!repository_format_precious_objects) {
if (run_command_v_opt(repack.argv, RUN_GIT_CMD))
return error(FAILED_RUN, repack.argv[0]);
if (prune_expire) {
argv_array_push(&prune, prune_expire);
if (quiet)
argv_array_push(&prune, "--no-progress");
if (repository_format_partial_clone)
argv_array_push(&prune,
"--exclude-promisor-objects");
if (run_command_v_opt(prune.argv, RUN_GIT_CMD))
return error(FAILED_RUN, prune.argv[0]);
}
}
if (prune_worktrees_expire) {
argv_array_push(&prune_worktrees, prune_worktrees_expire);
if (run_command_v_opt(prune_worktrees.argv, RUN_GIT_CMD))
return error(FAILED_RUN, prune_worktrees.argv[0]);
}
if (run_command_v_opt(rerere.argv, RUN_GIT_CMD))
return error(FAILED_RUN, rerere.argv[0]);
report_garbage = report_pack_garbage;
reprepare_packed_git(the_repository);
if (pack_garbage.nr > 0)
clean_pack_garbage();
if (auto_gc && too_many_loose_objects())
warning(_("There are too many unreachable loose objects; "
"run 'git prune' to remove them."));
if (!daemonized)
unlink(git_path("gc.log"));
return 0;
}