1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-25 19:26:38 +02:00

Merge branch 'tb/geometric-repack' into next

"git repack" so far has been only capable of repacking everything
under the sun into a single pack (or split by size).  A cleverer
strategy to reduce the cost of repacking a repository has been
introduced.

* tb/geometric-repack:
  builtin/pack-objects.c: ignore missing links with --stdin-packs
  builtin/repack.c: reword comment around pack-objects flags
  builtin/repack.c: be more conservative with unsigned overflows
  builtin/repack.c: assign pack split later
  t7703: test --geometric repack with loose objects
  builtin/repack.c: do not repack single packs with --geometric
  builtin/repack.c: add '--geometric' option
  packfile: add kept-pack cache for find_kept_pack_entry()
  builtin/pack-objects.c: rewrite honor-pack-keep logic
  p5303: measure time to repack with keep
  p5303: add missing &&-chains
  builtin/pack-objects.c: add '--stdin-packs' option
  revision: learn '--no-kept-objects'
  packfile: introduce 'find_kept_pack_entry()'
This commit is contained in:
Junio C Hamano 2021-03-22 14:50:09 -07:00
commit 8147f00207
13 changed files with 1029 additions and 60 deletions

View File

@ -85,6 +85,16 @@ base-name::
reference was included in the resulting packfile. This reference was included in the resulting packfile. This
can be useful to send new tags to native Git clients. can be useful to send new tags to native Git clients.
--stdin-packs::
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
from the standard input, instead of object names or revision
arguments. The resulting pack contains all objects listed in the
included packs (those not beginning with `^`), excluding any
objects listed in the excluded packs (beginning with `^`).
+
Incompatible with `--revs`, or options that imply `--revs` (such as
`--all`), with the exception of `--unpacked`, which is compatible.
--window=<n>:: --window=<n>::
--depth=<n>:: --depth=<n>::
These two options affect how the objects contained in These two options affect how the objects contained in

View File

@ -165,6 +165,29 @@ depth is 4095.
Pass the `--delta-islands` option to `git-pack-objects`, see Pass the `--delta-islands` option to `git-pack-objects`, see
linkgit:git-pack-objects[1]. linkgit:git-pack-objects[1].
-g=<factor>::
--geometric=<factor>::
Arrange resulting pack structure so that each successive pack
contains at least `<factor>` times the number of objects as the
next-largest pack.
+
`git repack` ensures this by determining a "cut" of packfiles that need
to be repacked into one in order to ensure a geometric progression. It
picks the smallest set of packfiles such that as many of the larger
packfiles (by count of objects contained in that pack) may be left
intact.
+
Unlike other repack modes, the set of objects to pack is determined
uniquely by the set of packs being "rolled-up"; in other words, the
packs determined to need to be combined in order to restore a geometric
progression.
+
When `--unpacked` is specified, loose objects are implicitly included in
this "roll-up", without respect to their reachability. This is subject
to change in the future. This option (implying a drastically different
repack mode) is not guaranteed to work with all other combinations of
option to `git repack`).
CONFIGURATION CONFIGURATION
------------- -------------

View File

@ -1188,7 +1188,8 @@ static int have_duplicate_entry(const struct object_id *oid,
return 1; return 1;
} }
static int want_found_object(int exclude, struct packed_git *p) static int want_found_object(const struct object_id *oid, int exclude,
struct packed_git *p)
{ {
if (exclude) if (exclude)
return 1; return 1;
@ -1204,27 +1205,82 @@ static int want_found_object(int exclude, struct packed_git *p)
* make sure no copy of this object appears in _any_ pack that makes us * make sure no copy of this object appears in _any_ pack that makes us
* to omit the object, so we need to check all the packs. * to omit the object, so we need to check all the packs.
* *
* We can however first check whether these options can possible matter; * We can however first check whether these options can possibly matter;
* if they do not matter we know we want the object in generated pack. * if they do not matter we know we want the object in generated pack.
* Otherwise, we signal "-1" at the end to tell the caller that we do * Otherwise, we signal "-1" at the end to tell the caller that we do
* not know either way, and it needs to check more packs. * not know either way, and it needs to check more packs.
*/ */
if (!ignore_packed_keep_on_disk &&
!ignore_packed_keep_in_core &&
(!local || !have_non_local_packs))
return 1;
/*
* Objects in packs borrowed from elsewhere are discarded regardless of
* if they appear in other packs that weren't borrowed.
*/
if (local && !p->pack_local) if (local && !p->pack_local)
return 0; return 0;
if (p->pack_local &&
((ignore_packed_keep_on_disk && p->pack_keep) || /*
(ignore_packed_keep_in_core && p->pack_keep_in_core))) * Then handle .keep first, as we have a fast(er) path there.
return 0; */
if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
/*
* Set the flags for the kept-pack cache to be the ones we want
* to ignore.
*
* That is, if we are ignoring objects in on-disk keep packs,
* then we want to search through the on-disk keep and ignore
* the in-core ones.
*/
unsigned flags = 0;
if (ignore_packed_keep_on_disk)
flags |= ON_DISK_KEEP_PACKS;
if (ignore_packed_keep_in_core)
flags |= IN_CORE_KEEP_PACKS;
if (ignore_packed_keep_on_disk && p->pack_keep)
return 0;
if (ignore_packed_keep_in_core && p->pack_keep_in_core)
return 0;
if (has_object_kept_pack(oid, flags))
return 0;
}
/*
* At this point we know definitively that either we don't care about
* keep-packs, or the object is not in one. Keep checking other
* conditions...
*/
if (!local || !have_non_local_packs)
return 1;
/* we don't know yet; keep looking for more packs */ /* we don't know yet; keep looking for more packs */
return -1; return -1;
} }
static int want_object_in_pack_one(struct packed_git *p,
const struct object_id *oid,
int exclude,
struct packed_git **found_pack,
off_t *found_offset)
{
off_t offset;
if (p == *found_pack)
offset = *found_offset;
else
offset = find_pack_entry_one(oid->hash, p);
if (offset) {
if (!*found_pack) {
if (!is_pack_valid(p))
return -1;
*found_offset = offset;
*found_pack = p;
}
return want_found_object(oid, exclude, p);
}
return -1;
}
/* /*
* Check whether we want the object in the pack (e.g., we do not want * Check whether we want the object in the pack (e.g., we do not want
* objects found in non-local stores if the "--local" option was used). * objects found in non-local stores if the "--local" option was used).
@ -1252,7 +1308,7 @@ static int want_object_in_pack(const struct object_id *oid,
* are present we will determine the answer right now. * are present we will determine the answer right now.
*/ */
if (*found_pack) { if (*found_pack) {
want = want_found_object(exclude, *found_pack); want = want_found_object(oid, exclude, *found_pack);
if (want != -1) if (want != -1)
return want; return want;
} }
@ -1260,51 +1316,20 @@ static int want_object_in_pack(const struct object_id *oid,
for (m = get_multi_pack_index(the_repository); m; m = m->next) { for (m = get_multi_pack_index(the_repository); m; m = m->next) {
struct pack_entry e; struct pack_entry e;
if (fill_midx_entry(the_repository, oid, &e, m)) { if (fill_midx_entry(the_repository, oid, &e, m)) {
struct packed_git *p = e.p; want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
off_t offset; if (want != -1)
return want;
if (p == *found_pack)
offset = *found_offset;
else
offset = find_pack_entry_one(oid->hash, p);
if (offset) {
if (!*found_pack) {
if (!is_pack_valid(p))
continue;
*found_offset = offset;
*found_pack = p;
}
want = want_found_object(exclude, p);
if (want != -1)
return want;
}
} }
} }
list_for_each(pos, get_packed_git_mru(the_repository)) { list_for_each(pos, get_packed_git_mru(the_repository)) {
struct packed_git *p = list_entry(pos, struct packed_git, mru); struct packed_git *p = list_entry(pos, struct packed_git, mru);
off_t offset; want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
if (!exclude && want > 0)
if (p == *found_pack) list_move(&p->mru,
offset = *found_offset; get_packed_git_mru(the_repository));
else if (want != -1)
offset = find_pack_entry_one(oid->hash, p); return want;
if (offset) {
if (!*found_pack) {
if (!is_pack_valid(p))
continue;
*found_offset = offset;
*found_pack = p;
}
want = want_found_object(exclude, p);
if (!exclude && want > 0)
list_move(&p->mru,
get_packed_git_mru(the_repository));
if (want != -1)
return want;
}
} }
if (uri_protocols.nr) { if (uri_protocols.nr) {
@ -2986,6 +3011,191 @@ static int git_pack_config(const char *k, const char *v, void *cb)
return git_default_config(k, v, cb); return git_default_config(k, v, cb);
} }
/* Counters for trace2 output when in --stdin-packs mode. */
static int stdin_packs_found_nr;
static int stdin_packs_hints_nr;
static int add_object_entry_from_pack(const struct object_id *oid,
struct packed_git *p,
uint32_t pos,
void *_data)
{
struct rev_info *revs = _data;
struct object_info oi = OBJECT_INFO_INIT;
off_t ofs;
enum object_type type;
display_progress(progress_state, ++nr_seen);
if (have_duplicate_entry(oid, 0))
return 0;
ofs = nth_packed_object_offset(p, pos);
if (!want_object_in_pack(oid, 0, &p, &ofs))
return 0;
oi.typep = &type;
if (packed_object_info(the_repository, p, ofs, &oi) < 0)
die(_("could not get type of object %s in pack %s"),
oid_to_hex(oid), p->pack_name);
else if (type == OBJ_COMMIT) {
/*
* commits in included packs are used as starting points for the
* subsequent revision walk
*/
add_pending_oid(revs, NULL, oid, 0);
}
stdin_packs_found_nr++;
create_object_entry(oid, type, 0, 0, 0, p, ofs);
return 0;
}
static void show_commit_pack_hint(struct commit *commit, void *_data)
{
/* nothing to do; commits don't have a namehash */
}
static void show_object_pack_hint(struct object *object, const char *name,
void *_data)
{
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
if (!oe)
return;
/*
* Our 'to_pack' list was constructed by iterating all objects packed in
* included packs, and so doesn't have a non-zero hash field that you
* would typically pick up during a reachability traversal.
*
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
* here using a now in order to perhaps improve the delta selection
* process.
*/
oe->hash = pack_name_hash(name);
oe->no_try_delta = name && no_try_delta(name);
stdin_packs_hints_nr++;
}
static int pack_mtime_cmp(const void *_a, const void *_b)
{
struct packed_git *a = ((const struct string_list_item*)_a)->util;
struct packed_git *b = ((const struct string_list_item*)_b)->util;
/*
* order packs by descending mtime so that objects are laid out
* roughly as newest-to-oldest
*/
if (a->mtime < b->mtime)
return 1;
else if (b->mtime < a->mtime)
return -1;
else
return 0;
}
static void read_packs_list_from_stdin(void)
{
struct strbuf buf = STRBUF_INIT;
struct string_list include_packs = STRING_LIST_INIT_DUP;
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
struct string_list_item *item = NULL;
struct packed_git *p;
struct rev_info revs;
repo_init_revisions(the_repository, &revs, NULL);
/*
* Use a revision walk to fill in the namehash of objects in the include
* packs. To save time, we'll avoid traversing through objects that are
* in excluded packs.
*
* That may cause us to avoid populating all of the namehash fields of
* all included objects, but our goal is best-effort, since this is only
* an optimization during delta selection.
*/
revs.no_kept_objects = 1;
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
revs.blob_objects = 1;
revs.tree_objects = 1;
revs.tag_objects = 1;
revs.ignore_missing_links = 1;
while (strbuf_getline(&buf, stdin) != EOF) {
if (!buf.len)
continue;
if (*buf.buf == '^')
string_list_append(&exclude_packs, buf.buf + 1);
else
string_list_append(&include_packs, buf.buf);
strbuf_reset(&buf);
}
string_list_sort(&include_packs);
string_list_sort(&exclude_packs);
for (p = get_all_packs(the_repository); p; p = p->next) {
const char *pack_name = pack_basename(p);
item = string_list_lookup(&include_packs, pack_name);
if (!item)
item = string_list_lookup(&exclude_packs, pack_name);
if (item)
item->util = p;
}
/*
* First handle all of the excluded packs, marking them as kept in-core
* so that later calls to add_object_entry() discards any objects that
* are also found in excluded packs.
*/
for_each_string_list_item(item, &exclude_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
p->pack_keep_in_core = 1;
}
/*
* Order packs by ascending mtime; use QSORT directly to access the
* string_list_item's ->util pointer, which string_list_sort() does not
* provide.
*/
QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
for_each_string_list_item(item, &include_packs) {
struct packed_git *p = item->util;
if (!p)
die(_("could not find pack '%s'"), item->string);
for_each_object_in_pack(p,
add_object_entry_from_pack,
&revs,
FOR_EACH_OBJECT_PACK_ORDER);
}
if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
traverse_commit_list(&revs,
show_commit_pack_hint,
show_object_pack_hint,
NULL);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
stdin_packs_found_nr);
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
stdin_packs_hints_nr);
strbuf_release(&buf);
string_list_clear(&include_packs, 0);
string_list_clear(&exclude_packs, 0);
}
static void read_object_list_from_stdin(void) static void read_object_list_from_stdin(void)
{ {
char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2]; char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
@ -3489,6 +3699,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
struct strvec rp = STRVEC_INIT; struct strvec rp = STRVEC_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
int rev_list_index = 0; int rev_list_index = 0;
int stdin_packs = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
struct option pack_objects_options[] = { struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress, OPT_SET_INT('q', "quiet", &progress,
@ -3539,6 +3750,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
N_("include objects referred to by the index"), N_("include objects referred to by the index"),
1, PARSE_OPT_NONEG), 1, PARSE_OPT_NONEG),
OPT_BOOL(0, "stdin-packs", &stdin_packs,
N_("read packs from stdin")),
OPT_BOOL(0, "stdout", &pack_to_stdout, OPT_BOOL(0, "stdout", &pack_to_stdout,
N_("output pack to stdout")), N_("output pack to stdout")),
OPT_BOOL(0, "include-tag", &include_tag, OPT_BOOL(0, "include-tag", &include_tag,
@ -3645,7 +3858,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
use_internal_rev_list = 1; use_internal_rev_list = 1;
strvec_push(&rp, "--indexed-objects"); strvec_push(&rp, "--indexed-objects");
} }
if (rev_list_unpacked) { if (rev_list_unpacked && !stdin_packs) {
use_internal_rev_list = 1; use_internal_rev_list = 1;
strvec_push(&rp, "--unpacked"); strvec_push(&rp, "--unpacked");
} }
@ -3690,8 +3903,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (filter_options.choice) { if (filter_options.choice) {
if (!pack_to_stdout) if (!pack_to_stdout)
die(_("cannot use --filter without --stdout")); die(_("cannot use --filter without --stdout"));
if (stdin_packs)
die(_("cannot use --filter with --stdin-packs"));
} }
if (stdin_packs && use_internal_rev_list)
die(_("cannot use internal rev list with --stdin-packs"));
/* /*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want * "soft" reasons not to use bitmaps - for on-disk repack by default we want
* *
@ -3750,7 +3968,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (progress) if (progress)
progress_state = start_progress(_("Enumerating objects"), 0); progress_state = start_progress(_("Enumerating objects"), 0);
if (!use_internal_rev_list) if (stdin_packs) {
/* avoids adding objects in excluded packs */
ignore_packed_keep_in_core = 1;
read_packs_list_from_stdin();
if (rev_list_unpacked)
add_unreachable_loose_objects();
} else if (!use_internal_rev_list)
read_object_list_from_stdin(); read_object_list_from_stdin();
else { else {
get_object_list(rp.nr, rp.v); get_object_list(rp.nr, rp.v);

View File

@ -297,6 +297,142 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
#define ALL_INTO_ONE 1 #define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2 #define LOOSEN_UNREACHABLE 2
struct pack_geometry {
struct packed_git **pack;
uint32_t pack_nr, pack_alloc;
uint32_t split;
};
static uint32_t geometry_pack_weight(struct packed_git *p)
{
if (open_pack_index(p))
die(_("cannot open index for %s"), p->pack_name);
return p->num_objects;
}
static int geometry_cmp(const void *va, const void *vb)
{
uint32_t aw = geometry_pack_weight(*(struct packed_git **)va),
bw = geometry_pack_weight(*(struct packed_git **)vb);
if (aw < bw)
return -1;
if (aw > bw)
return 1;
return 0;
}
static void init_pack_geometry(struct pack_geometry **geometry_p)
{
struct packed_git *p;
struct pack_geometry *geometry;
*geometry_p = xcalloc(1, sizeof(struct pack_geometry));
geometry = *geometry_p;
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!pack_kept_objects && p->pack_keep)
continue;
ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
geometry->pack_alloc);
geometry->pack[geometry->pack_nr] = p;
geometry->pack_nr++;
}
QSORT(geometry->pack, geometry->pack_nr, geometry_cmp);
}
static void split_pack_geometry(struct pack_geometry *geometry, int factor)
{
uint32_t i;
uint32_t split;
off_t total_size = 0;
if (!geometry->pack_nr) {
geometry->split = geometry->pack_nr;
return;
}
/*
* First, count the number of packs (in descending order of size) which
* already form a geometric progression.
*/
for (i = geometry->pack_nr - 1; i > 0; i--) {
struct packed_git *ours = geometry->pack[i];
struct packed_git *prev = geometry->pack[i - 1];
if (unsigned_mult_overflows(factor, geometry_pack_weight(prev)))
die(_("pack %s too large to consider in geometric "
"progression"),
prev->pack_name);
if (geometry_pack_weight(ours) < factor * geometry_pack_weight(prev))
break;
}
split = i;
if (split) {
/*
* Move the split one to the right, since the top element in the
* last-compared pair can't be in the progression. Only do this
* when we split in the middle of the array (otherwise if we got
* to the end, then the split is in the right place).
*/
split++;
}
/*
* Then, anything to the left of 'split' must be in a new pack. But,
* creating that new pack may cause packs in the heavy half to no longer
* form a geometric progression.
*
* Compute an expected size of the new pack, and then determine how many
* packs in the heavy half need to be joined into it (if any) to restore
* the geometric progression.
*/
for (i = 0; i < split; i++) {
struct packed_git *p = geometry->pack[i];
if (unsigned_add_overflows(total_size, geometry_pack_weight(p)))
die(_("pack %s too large to roll up"), p->pack_name);
total_size += geometry_pack_weight(p);
}
for (i = split; i < geometry->pack_nr; i++) {
struct packed_git *ours = geometry->pack[i];
if (unsigned_mult_overflows(factor, total_size))
die(_("pack %s too large to roll up"), ours->pack_name);
if (geometry_pack_weight(ours) < factor * total_size) {
if (unsigned_add_overflows(total_size,
geometry_pack_weight(ours)))
die(_("pack %s too large to roll up"),
ours->pack_name);
split++;
total_size += geometry_pack_weight(ours);
} else
break;
}
geometry->split = split;
}
static void clear_pack_geometry(struct pack_geometry *geometry)
{
if (!geometry)
return;
free(geometry->pack);
geometry->pack_nr = 0;
geometry->pack_alloc = 0;
geometry->split = 0;
}
int cmd_repack(int argc, const char **argv, const char *prefix) int cmd_repack(int argc, const char **argv, const char *prefix)
{ {
struct child_process cmd = CHILD_PROCESS_INIT; struct child_process cmd = CHILD_PROCESS_INIT;
@ -304,6 +440,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
struct string_list names = STRING_LIST_INIT_DUP; struct string_list names = STRING_LIST_INIT_DUP;
struct string_list rollback = STRING_LIST_INIT_NODUP; struct string_list rollback = STRING_LIST_INIT_NODUP;
struct string_list existing_packs = STRING_LIST_INIT_DUP; struct string_list existing_packs = STRING_LIST_INIT_DUP;
struct pack_geometry *geometry = NULL;
struct strbuf line = STRBUF_INIT; struct strbuf line = STRBUF_INIT;
int i, ext, ret; int i, ext, ret;
FILE *out; FILE *out;
@ -316,6 +453,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
int no_update_server_info = 0; int no_update_server_info = 0;
struct pack_objects_args po_args = {NULL}; struct pack_objects_args po_args = {NULL};
int geometric_factor = 0;
struct option builtin_repack_options[] = { struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything, OPT_BIT('a', NULL, &pack_everything,
@ -356,6 +494,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("repack objects in packs marked with .keep")), N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
N_("do not repack this pack")), N_("do not repack this pack")),
OPT_INTEGER('g', "geometric", &geometric_factor,
N_("find a geometric progression with factor <N>")),
OPT_END() OPT_END()
}; };
@ -382,6 +522,13 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE)) if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
die(_(incremental_bitmap_conflict_error)); die(_(incremental_bitmap_conflict_error));
if (geometric_factor) {
if (pack_everything)
die(_("--geometric is incompatible with -A, -a"));
init_pack_geometry(&geometry);
split_pack_geometry(geometry, geometric_factor);
}
packdir = mkpathdup("%s/pack", get_object_directory()); packdir = mkpathdup("%s/pack", get_object_directory());
packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid()); packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
@ -396,9 +543,21 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_pushf(&cmd.args, "--keep-pack=%s", strvec_pushf(&cmd.args, "--keep-pack=%s",
keep_pack_list.items[i].string); keep_pack_list.items[i].string);
strvec_push(&cmd.args, "--non-empty"); strvec_push(&cmd.args, "--non-empty");
strvec_push(&cmd.args, "--all"); if (!geometry) {
strvec_push(&cmd.args, "--reflog"); /*
strvec_push(&cmd.args, "--indexed-objects"); * We need to grab all reachable objects, including those that
* are reachable from reflogs and the index.
*
* When repacking into a geometric progression of packs,
* however, we ask 'git pack-objects --stdin-packs', and it is
* not about packing objects based on reachability but about
* repacking all the objects in specified packs and loose ones
* (indeed, --stdin-packs is incompatible with these options).
*/
strvec_push(&cmd.args, "--all");
strvec_push(&cmd.args, "--reflog");
strvec_push(&cmd.args, "--indexed-objects");
}
if (has_promisor_remote()) if (has_promisor_remote())
strvec_push(&cmd.args, "--exclude-promisor-objects"); strvec_push(&cmd.args, "--exclude-promisor-objects");
if (write_bitmaps > 0) if (write_bitmaps > 0)
@ -429,17 +588,37 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1"); strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
} }
} }
} else if (geometry) {
strvec_push(&cmd.args, "--stdin-packs");
strvec_push(&cmd.args, "--unpacked");
} else { } else {
strvec_push(&cmd.args, "--unpacked"); strvec_push(&cmd.args, "--unpacked");
strvec_push(&cmd.args, "--incremental"); strvec_push(&cmd.args, "--incremental");
} }
cmd.no_stdin = 1; if (geometry)
cmd.in = -1;
else
cmd.no_stdin = 1;
ret = start_command(&cmd); ret = start_command(&cmd);
if (ret) if (ret)
return ret; return ret;
if (geometry) {
FILE *in = xfdopen(cmd.in, "w");
/*
* The resulting pack should contain all objects in packs that
* are going to be rolled up, but exclude objects in packs which
* are being left alone.
*/
for (i = 0; i < geometry->split; i++)
fprintf(in, "%s\n", pack_basename(geometry->pack[i]));
for (i = geometry->split; i < geometry->pack_nr; i++)
fprintf(in, "^%s\n", pack_basename(geometry->pack[i]));
fclose(in);
}
out = xfdopen(cmd.out, "r"); out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) { while (strbuf_getline_lf(&line, out) != EOF) {
if (line.len != the_hash_algo->hexsz) if (line.len != the_hash_algo->hexsz)
@ -507,6 +686,25 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!string_list_has_string(&names, sha1)) if (!string_list_has_string(&names, sha1))
remove_redundant_pack(packdir, item->string); remove_redundant_pack(packdir, item->string);
} }
if (geometry) {
struct strbuf buf = STRBUF_INIT;
uint32_t i;
for (i = 0; i < geometry->split; i++) {
struct packed_git *p = geometry->pack[i];
if (string_list_has_string(&names,
hash_to_hex(p->hash)))
continue;
strbuf_reset(&buf);
strbuf_addstr(&buf, pack_basename(p));
strbuf_strip_suffix(&buf, ".pack");
remove_redundant_pack(packdir, buf.buf);
}
strbuf_release(&buf);
}
if (!po_args.quiet && isatty(2)) if (!po_args.quiet && isatty(2))
opts |= PRUNE_PACKED_VERBOSE; opts |= PRUNE_PACKED_VERBOSE;
prune_packed_objects(opts); prune_packed_objects(opts);
@ -528,6 +726,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
string_list_clear(&names, 0); string_list_clear(&names, 0);
string_list_clear(&rollback, 0); string_list_clear(&rollback, 0);
string_list_clear(&existing_packs, 0); string_list_clear(&existing_packs, 0);
clear_pack_geometry(geometry);
strbuf_release(&line); strbuf_release(&line);
return 0; return 0;

View File

@ -153,6 +153,11 @@ struct raw_object_store {
/* A most-recently-used ordered version of the packed_git list. */ /* A most-recently-used ordered version of the packed_git list. */
struct list_head packed_git_mru; struct list_head packed_git_mru;
struct {
struct packed_git **packs;
unsigned flags;
} kept_pack_cache;
/* /*
* A map of packfiles to packed_git structs for tracking which * A map of packfiles to packed_git structs for tracking which
* packs have been loaded already. * packs have been loaded already.

View File

@ -2066,12 +2066,79 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
return 0; return 0;
} }
static void maybe_invalidate_kept_pack_cache(struct repository *r,
unsigned flags)
{
if (!r->objects->kept_pack_cache.packs)
return;
if (r->objects->kept_pack_cache.flags == flags)
return;
FREE_AND_NULL(r->objects->kept_pack_cache.packs);
r->objects->kept_pack_cache.flags = 0;
}
static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
{
maybe_invalidate_kept_pack_cache(r, flags);
if (!r->objects->kept_pack_cache.packs) {
struct packed_git **packs = NULL;
size_t nr = 0, alloc = 0;
struct packed_git *p;
/*
* We want "all" packs here, because we need to cover ones that
* are used by a midx, as well. We need to look in every one of
* them (instead of the midx itself) to cover duplicates. It's
* possible that an object is found in two packs that the midx
* covers, one kept and one not kept, but the midx returns only
* the non-kept version.
*/
for (p = get_all_packs(r); p; p = p->next) {
if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
(p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr++] = p;
}
}
ALLOC_GROW(packs, nr + 1, alloc);
packs[nr] = NULL;
r->objects->kept_pack_cache.packs = packs;
r->objects->kept_pack_cache.flags = flags;
}
return r->objects->kept_pack_cache.packs;
}
int find_kept_pack_entry(struct repository *r,
const struct object_id *oid,
unsigned flags,
struct pack_entry *e)
{
struct packed_git **cache;
for (cache = kept_pack_cache(r, flags); *cache; cache++) {
struct packed_git *p = *cache;
if (fill_pack_entry(oid, e, p))
return 1;
}
return 0;
}
int has_object_pack(const struct object_id *oid) int has_object_pack(const struct object_id *oid)
{ {
struct pack_entry e; struct pack_entry e;
return find_pack_entry(the_repository, oid, &e); return find_pack_entry(the_repository, oid, &e);
} }
int has_object_kept_pack(const struct object_id *oid, unsigned flags)
{
struct pack_entry e;
return find_kept_pack_entry(the_repository, oid, flags, &e);
}
int has_pack_index(const unsigned char *sha1) int has_pack_index(const unsigned char *sha1)
{ {
struct stat st; struct stat st;

View File

@ -162,13 +162,18 @@ int packed_object_info(struct repository *r,
void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1); void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1);
const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1);
#define ON_DISK_KEEP_PACKS 1
#define IN_CORE_KEEP_PACKS 2
/* /*
* Iff a pack file in the given repository contains the object named by sha1, * Iff a pack file in the given repository contains the object named by sha1,
* return true and store its location to e. * return true and store its location to e.
*/ */
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e); int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
int has_object_pack(const struct object_id *oid); int has_object_pack(const struct object_id *oid);
int has_object_kept_pack(const struct object_id *oid, unsigned flags);
int has_pack_index(const unsigned char *sha1); int has_pack_index(const unsigned char *sha1);

View File

@ -2336,6 +2336,16 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
revs->unpacked = 1; revs->unpacked = 1;
} else if (starts_with(arg, "--unpacked=")) { } else if (starts_with(arg, "--unpacked=")) {
die(_("--unpacked=<packfile> no longer supported")); die(_("--unpacked=<packfile> no longer supported"));
} else if (!strcmp(arg, "--no-kept-objects")) {
revs->no_kept_objects = 1;
revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
} else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) {
revs->no_kept_objects = 1;
if (!strcmp(optarg, "in-core"))
revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
if (!strcmp(optarg, "on-disk"))
revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
} else if (!strcmp(arg, "-r")) { } else if (!strcmp(arg, "-r")) {
revs->diff = 1; revs->diff = 1;
revs->diffopt.flags.recursive = 1; revs->diffopt.flags.recursive = 1;
@ -3795,6 +3805,11 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
return commit_ignore; return commit_ignore;
if (revs->unpacked && has_object_pack(&commit->object.oid)) if (revs->unpacked && has_object_pack(&commit->object.oid))
return commit_ignore; return commit_ignore;
if (revs->no_kept_objects) {
if (has_object_kept_pack(&commit->object.oid,
revs->keep_pack_cache_flags))
return commit_ignore;
}
if (commit->object.flags & UNINTERESTING) if (commit->object.flags & UNINTERESTING)
return commit_ignore; return commit_ignore;
if (revs->line_level_traverse && !want_ancestry(revs)) { if (revs->line_level_traverse && !want_ancestry(revs)) {

View File

@ -148,6 +148,7 @@ struct rev_info {
edge_hint_aggressive:1, edge_hint_aggressive:1,
limited:1, limited:1,
unpacked:1, unpacked:1,
no_kept_objects:1,
boundary:2, boundary:2,
count:1, count:1,
left_right:1, left_right:1,
@ -317,6 +318,9 @@ struct rev_info {
* This is loaded from the commit-graph being used. * This is loaded from the commit-graph being used.
*/ */
struct bloom_filter_settings *bloom_filter_settings; struct bloom_filter_settings *bloom_filter_settings;
/* misc. flags related to '--no-kept-objects' */
unsigned keep_pack_cache_flags;
}; };
int ref_excluded(struct string_list *, const char *path); int ref_excluded(struct string_list *, const char *path);

View File

@ -28,11 +28,18 @@ repack_into_n () {
push @commits, $_ if $. % 5 == 1; push @commits, $_ if $. % 5 == 1;
} }
print reverse @commits; print reverse @commits;
' "$1" >pushes ' "$1" >pushes &&
# create base packfile # create base packfile
head -n 1 pushes | base_pack=$(
git pack-objects --delta-base-offset --revs staging/pack head -n 1 pushes |
git pack-objects --delta-base-offset --revs staging/pack
) &&
test_export base_pack &&
# create an empty packfile
empty_pack=$(git pack-objects staging/pack </dev/null) &&
test_export empty_pack &&
# and then incrementals between each pair of commits # and then incrementals between each pair of commits
last= && last= &&
@ -49,6 +56,12 @@ repack_into_n () {
last=$rev last=$rev
done <pushes && done <pushes &&
(
find staging -type f -name 'pack-*.pack' |
xargs -n 1 basename | grep -v "$base_pack" &&
printf "^pack-%s.pack\n" $base_pack
) >stdin.packs
# and install the whole thing # and install the whole thing
rm -f .git/objects/pack/* && rm -f .git/objects/pack/* &&
mv staging/* .git/objects/pack/ mv staging/* .git/objects/pack/
@ -91,6 +104,23 @@ do
--reflog --indexed-objects --delta-base-offset \ --reflog --indexed-objects --delta-base-offset \
--stdout </dev/null >/dev/null --stdout </dev/null >/dev/null
' '
test_perf "repack with kept ($nr_packs)" '
git pack-objects --keep-true-parents \
--keep-pack=pack-$empty_pack.pack \
--honor-pack-keep --non-empty --all \
--reflog --indexed-objects --delta-base-offset \
--stdout </dev/null >/dev/null
'
test_perf "repack with --stdin-packs ($nr_packs)" '
git pack-objects \
--keep-true-parents \
--stdin-packs \
--non-empty \
--delta-base-offset \
--stdout <stdin.packs >/dev/null
'
done done
# Measure pack loading with 10,000 packs. # Measure pack loading with 10,000 packs.

View File

@ -532,4 +532,139 @@ test_expect_success 'prefetch objects' '
test_line_count = 1 donelines test_line_count = 1 donelines
' '
test_expect_success 'setup for --stdin-packs tests' '
git init stdin-packs &&
(
cd stdin-packs &&
test_commit A &&
test_commit B &&
test_commit C &&
for id in A B C
do
git pack-objects .git/objects/pack/pack-$id \
--incremental --revs <<-EOF
refs/tags/$id
EOF
done &&
ls -la .git/objects/pack
)
'
test_expect_success '--stdin-packs with excluded packs' '
(
cd stdin-packs &&
PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
git pack-objects test --stdin-packs <<-EOF &&
$PACK_A
^$PACK_B
$PACK_C
EOF
(
git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-C-*.idx)
) >expect.raw &&
git show-index <$(ls test-*.idx) >actual.raw &&
cut -d" " -f2 <expect.raw | sort >expect &&
cut -d" " -f2 <actual.raw | sort >actual &&
test_cmp expect actual
)
'
test_expect_success '--stdin-packs is incompatible with --filter' '
(
cd stdin-packs &&
test_must_fail git pack-objects --stdin-packs --stdout \
--filter=blob:none </dev/null 2>err &&
test_i18ngrep "cannot use --filter with --stdin-packs" err
)
'
test_expect_success '--stdin-packs is incompatible with --revs' '
(
cd stdin-packs &&
test_must_fail git pack-objects --stdin-packs --revs out \
</dev/null 2>err &&
test_i18ngrep "cannot use internal rev list with --stdin-packs" err
)
'
test_expect_success '--stdin-packs with loose objects' '
(
cd stdin-packs &&
PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
test_commit D && # loose
git pack-objects test2 --stdin-packs --unpacked <<-EOF &&
$PACK_A
^$PACK_B
$PACK_C
EOF
(
git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-C-*.idx) &&
git rev-list --objects --no-object-names \
refs/tags/C..refs/tags/D
) >expect.raw &&
ls -la . &&
git show-index <$(ls test2-*.idx) >actual.raw &&
cut -d" " -f2 <expect.raw | sort >expect &&
cut -d" " -f2 <actual.raw | sort >actual &&
test_cmp expect actual
)
'
test_expect_success '--stdin-packs with broken links' '
(
cd stdin-packs &&
# make an unreachable object with a bogus parent
git cat-file -p HEAD >commit &&
sed "s/$(git rev-parse HEAD^)/$(test_oid zero)/" <commit |
git hash-object -w -t commit --stdin >in &&
git pack-objects .git/objects/pack/pack-D <in &&
PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
PACK_D="$(basename .git/objects/pack/pack-D-*.pack)" &&
git pack-objects test3 --stdin-packs --unpacked <<-EOF &&
$PACK_A
^$PACK_B
$PACK_C
$PACK_D
EOF
(
git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-C-*.idx) &&
git show-index <$(ls .git/objects/pack/pack-D-*.idx) &&
git rev-list --objects --no-object-names \
refs/tags/C..refs/tags/D
) >expect.raw &&
git show-index <$(ls test3-*.idx) >actual.raw &&
cut -d" " -f2 <expect.raw | sort >expect &&
cut -d" " -f2 <actual.raw | sort >actual &&
test_cmp expect actual
)
'
test_done test_done

69
t/t6114-keep-packs.sh Executable file
View File

@ -0,0 +1,69 @@
#!/bin/sh
test_description='rev-list with .keep packs'
. ./test-lib.sh
test_expect_success 'setup' '
test_commit loose &&
test_commit packed &&
test_commit kept &&
KEPT_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
refs/tags/kept
^refs/tags/packed
EOF
) &&
MISC_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
refs/tags/packed
^refs/tags/loose
EOF
) &&
touch .git/objects/pack/pack-$KEPT_PACK.keep
'
rev_list_objects () {
git rev-list "$@" >out &&
sort out
}
idx_objects () {
git show-index <$1 >expect-idx &&
cut -d" " -f2 <expect-idx | sort
}
test_expect_success '--no-kept-objects excludes trees and blobs in .keep packs' '
rev_list_objects --objects --all --no-object-names >kept &&
rev_list_objects --objects --all --no-object-names --no-kept-objects >no-kept &&
idx_objects .git/objects/pack/pack-$KEPT_PACK.idx >expect &&
comm -3 kept no-kept >actual &&
test_cmp expect actual
'
test_expect_success '--no-kept-objects excludes kept non-MIDX object' '
test_config core.multiPackIndex true &&
# Create a pack with just the commit object in pack, and do not mark it
# as kept (even though it appears in $KEPT_PACK, which does have a .keep
# file).
MIDX_PACK=$(git pack-objects .git/objects/pack/pack <<-EOF
$(git rev-parse kept)
EOF
) &&
# Write a MIDX containing all packs, but use the version of the commit
# at "kept" in a non-kept pack by touching $MIDX_PACK.
touch .git/objects/pack/pack-$MIDX_PACK.pack &&
git multi-pack-index write &&
rev_list_objects --objects --no-object-names --no-kept-objects HEAD >actual &&
(
idx_objects .git/objects/pack/pack-$MISC_PACK.idx &&
git rev-list --objects --no-object-names refs/tags/loose
) | sort >expect &&
test_cmp expect actual
'
test_done

183
t/t7703-repack-geometric.sh Executable file
View File

@ -0,0 +1,183 @@
#!/bin/sh
test_description='git repack --geometric works correctly'
. ./test-lib.sh
GIT_TEST_MULTI_PACK_INDEX=0
objdir=.git/objects
midx=$objdir/pack/multi-pack-index
test_expect_success '--geometric with no packs' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
git repack --geometric 2 >out &&
test_i18ngrep "Nothing new to pack" out
)
'
test_expect_success '--geometric with one pack' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
test_commit "base" &&
git repack -d &&
git repack --geometric 2 >out &&
test_i18ngrep "Nothing new to pack" out
)
'
test_expect_success '--geometric with an intact progression' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# These packs already form a geometric progression.
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 2 && # 6 objects
test_commit_bulk --start=4 4 && # 12 objects
find $objdir/pack -name "*.pack" | sort >expect &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >actual &&
test_cmp expect actual
)
'
test_expect_success '--geometric with loose objects' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# These packs already form a geometric progression.
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 2 && # 6 objects
# The loose objects are packed together, breaking the
# progression.
test_commit loose && # 3 objects
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
comm -13 before after >new &&
comm -23 before after >removed &&
test_line_count = 1 new &&
test_must_be_empty removed &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
# The progression (3, 3, 6) is combined into one new pack.
test_line_count = 1 after
)
'
test_expect_success '--geometric with small-pack rollup' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 1 && # 3 objects
find $objdir/pack -name "*.pack" | sort >small &&
test_commit_bulk --start=3 4 && # 12 objects
test_commit_bulk --start=7 8 && # 24 objects
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
# Three packs in total; two of the existing large ones, and one
# new one.
find $objdir/pack -name "*.pack" | sort >after &&
test_line_count = 3 after &&
comm -3 small before | tr -d "\t" >large &&
grep -qFf large after
)
'
test_expect_success '--geometric with small- and large-pack rollup' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
# size(small1) + size(small2) > size(medium) / 2
test_commit_bulk --start=1 1 && # 3 objects
test_commit_bulk --start=2 1 && # 3 objects
test_commit_bulk --start=2 3 && # 7 objects
test_commit_bulk --start=6 9 && # 27 objects &&
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
comm -12 before after >untouched &&
# Two packs in total; the largest pack from before running "git
# repack", and one new one.
test_line_count = 1 untouched &&
test_line_count = 2 after
)
'
test_expect_success '--geometric ignores kept packs' '
git init geometric &&
test_when_finished "rm -fr geometric" &&
(
cd geometric &&
test_commit kept && # 3 objects
test_commit pack && # 3 objects
KEPT=$(git pack-objects --revs $objdir/pack/pack <<-EOF
refs/tags/kept
EOF
) &&
PACK=$(git pack-objects --revs $objdir/pack/pack <<-EOF
refs/tags/pack
^refs/tags/kept
EOF
) &&
# neither pack contains more than twice the number of objects in
# the other, so they should be combined. but, marking one as
# .kept on disk will "freeze" it, so the pack structure should
# remain unchanged.
touch $objdir/pack/pack-$KEPT.keep &&
find $objdir/pack -name "*.pack" | sort >before &&
git repack --geometric 2 -d &&
find $objdir/pack -name "*.pack" | sort >after &&
# both packs should still exist
test_path_is_file $objdir/pack/pack-$KEPT.pack &&
test_path_is_file $objdir/pack/pack-$PACK.pack &&
# and no new packs should be created
test_cmp before after &&
# Passing --pack-kept-objects causes packs with a .keep file to
# be repacked, too.
git repack --geometric 2 -d --pack-kept-objects &&
find $objdir/pack -name "*.pack" >after &&
test_line_count = 1 after
)
'
test_done