1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-26 03:36:25 +02:00
git/builtin/pack-redundant.c
Junio C Hamano c3b58472be pack-redundant: gauge the usage before proposing its removal
The subcommand is unusably slow and the reason why nobody reports it
as a performance bug is suspected to be the absense of users.  Let's
show a big message that asks the user to tell us that they still
care about the command when an attempt is made to run the command,
with an escape hatch to override it with a command line option.

In a few releases, we may turn it into an error and keep it for a
few more releases before finally removing it (during the whole time,
the plan to remove it would be interrupted by end user raising hand).

Reviewed-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-12-15 14:30:11 -08:00

667 lines
15 KiB
C

/*
*
* Copyright 2005, Lukas Sandstrom <lukass@etek.chalmers.se>
*
* This file is licensed under the GPL v2.
*
*/
#include "builtin.h"
#include "repository.h"
#include "packfile.h"
#include "object-store.h"
#define BLKSIZE 512
static const char pack_redundant_usage[] =
"git pack-redundant [--verbose] [--alt-odb] (--all | <filename.pack>...)";
static int load_all_packs, verbose, alt_odb;
struct llist_item {
struct llist_item *next;
const struct object_id *oid;
};
static struct llist {
struct llist_item *front;
struct llist_item *back;
size_t size;
} *all_objects; /* all objects which must be present in local packfiles */
static struct pack_list {
struct pack_list *next;
struct packed_git *pack;
struct llist *unique_objects;
struct llist *remaining_objects;
size_t all_objects_size;
} *local_packs = NULL, *altodb_packs = NULL;
static struct llist_item *free_nodes;
static inline void llist_item_put(struct llist_item *item)
{
item->next = free_nodes;
free_nodes = item;
}
static inline struct llist_item *llist_item_get(void)
{
struct llist_item *new_item;
if ( free_nodes ) {
new_item = free_nodes;
free_nodes = free_nodes->next;
} else {
int i = 1;
ALLOC_ARRAY(new_item, BLKSIZE);
for (; i < BLKSIZE; i++)
llist_item_put(&new_item[i]);
}
return new_item;
}
static inline void llist_init(struct llist **list)
{
*list = xmalloc(sizeof(struct llist));
(*list)->front = (*list)->back = NULL;
(*list)->size = 0;
}
static struct llist * llist_copy(struct llist *list)
{
struct llist *ret;
struct llist_item *new_item, *old_item, *prev;
llist_init(&ret);
if ((ret->size = list->size) == 0)
return ret;
new_item = ret->front = llist_item_get();
new_item->oid = list->front->oid;
old_item = list->front->next;
while (old_item) {
prev = new_item;
new_item = llist_item_get();
prev->next = new_item;
new_item->oid = old_item->oid;
old_item = old_item->next;
}
new_item->next = NULL;
ret->back = new_item;
return ret;
}
static inline struct llist_item *llist_insert(struct llist *list,
struct llist_item *after,
const struct object_id *oid)
{
struct llist_item *new_item = llist_item_get();
new_item->oid = oid;
new_item->next = NULL;
if (after != NULL) {
new_item->next = after->next;
after->next = new_item;
if (after == list->back)
list->back = new_item;
} else {/* insert in front */
if (list->size == 0)
list->back = new_item;
else
new_item->next = list->front;
list->front = new_item;
}
list->size++;
return new_item;
}
static inline struct llist_item *llist_insert_back(struct llist *list,
const struct object_id *oid)
{
return llist_insert(list, list->back, oid);
}
static inline struct llist_item *llist_insert_sorted_unique(struct llist *list,
const struct object_id *oid, struct llist_item *hint)
{
struct llist_item *prev = NULL, *l;
l = (hint == NULL) ? list->front : hint;
while (l) {
int cmp = oidcmp(l->oid, oid);
if (cmp > 0) { /* we insert before this entry */
return llist_insert(list, prev, oid);
}
if (!cmp) { /* already exists */
return l;
}
prev = l;
l = l->next;
}
/* insert at the end */
return llist_insert_back(list, oid);
}
/* returns a pointer to an item in front of sha1 */
static inline struct llist_item * llist_sorted_remove(struct llist *list, const struct object_id *oid, struct llist_item *hint)
{
struct llist_item *prev, *l;
redo_from_start:
l = (hint == NULL) ? list->front : hint;
prev = NULL;
while (l) {
const int cmp = oidcmp(l->oid, oid);
if (cmp > 0) /* not in list, since sorted */
return prev;
if (!cmp) { /* found */
if (prev == NULL) {
if (hint != NULL && hint != list->front) {
/* we don't know the previous element */
hint = NULL;
goto redo_from_start;
}
list->front = l->next;
} else
prev->next = l->next;
if (l == list->back)
list->back = prev;
llist_item_put(l);
list->size--;
return prev;
}
prev = l;
l = l->next;
}
return prev;
}
/* computes A\B */
static void llist_sorted_difference_inplace(struct llist *A,
struct llist *B)
{
struct llist_item *hint, *b;
hint = NULL;
b = B->front;
while (b) {
hint = llist_sorted_remove(A, b->oid, hint);
b = b->next;
}
}
static inline struct pack_list * pack_list_insert(struct pack_list **pl,
struct pack_list *entry)
{
struct pack_list *p = xmalloc(sizeof(struct pack_list));
memcpy(p, entry, sizeof(struct pack_list));
p->next = *pl;
*pl = p;
return p;
}
static inline size_t pack_list_size(struct pack_list *pl)
{
size_t ret = 0;
while (pl) {
ret++;
pl = pl->next;
}
return ret;
}
static struct pack_list * pack_list_difference(const struct pack_list *A,
const struct pack_list *B)
{
struct pack_list *ret;
const struct pack_list *pl;
if (A == NULL)
return NULL;
pl = B;
while (pl != NULL) {
if (A->pack == pl->pack)
return pack_list_difference(A->next, B);
pl = pl->next;
}
ret = xmalloc(sizeof(struct pack_list));
memcpy(ret, A, sizeof(struct pack_list));
ret->next = pack_list_difference(A->next, B);
return ret;
}
static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2)
{
size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
const unsigned char *p1_base, *p2_base;
struct llist_item *p1_hint = NULL, *p2_hint = NULL;
const unsigned int hashsz = the_hash_algo->rawsz;
if (!p1->unique_objects)
p1->unique_objects = llist_copy(p1->remaining_objects);
if (!p2->unique_objects)
p2->unique_objects = llist_copy(p2->remaining_objects);
p1_base = p1->pack->index_data;
p2_base = p2->pack->index_data;
p1_base += 256 * 4 + ((p1->pack->index_version < 2) ? 4 : 8);
p2_base += 256 * 4 + ((p2->pack->index_version < 2) ? 4 : 8);
p1_step = hashsz + ((p1->pack->index_version < 2) ? 4 : 0);
p2_step = hashsz + ((p2->pack->index_version < 2) ? 4 : 0);
while (p1_off < p1->pack->num_objects * p1_step &&
p2_off < p2->pack->num_objects * p2_step)
{
const int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
/* cmp ~ p1 - p2 */
if (cmp == 0) {
p1_hint = llist_sorted_remove(p1->unique_objects,
(const struct object_id *)(p1_base + p1_off),
p1_hint);
p2_hint = llist_sorted_remove(p2->unique_objects,
(const struct object_id *)(p1_base + p1_off),
p2_hint);
p1_off += p1_step;
p2_off += p2_step;
continue;
}
if (cmp < 0) { /* p1 has the object, p2 doesn't */
p1_off += p1_step;
} else { /* p2 has the object, p1 doesn't */
p2_off += p2_step;
}
}
}
static size_t sizeof_union(struct packed_git *p1, struct packed_git *p2)
{
size_t ret = 0;
size_t p1_off = 0, p2_off = 0, p1_step, p2_step;
const unsigned char *p1_base, *p2_base;
const unsigned int hashsz = the_hash_algo->rawsz;
p1_base = p1->index_data;
p2_base = p2->index_data;
p1_base += 256 * 4 + ((p1->index_version < 2) ? 4 : 8);
p2_base += 256 * 4 + ((p2->index_version < 2) ? 4 : 8);
p1_step = hashsz + ((p1->index_version < 2) ? 4 : 0);
p2_step = hashsz + ((p2->index_version < 2) ? 4 : 0);
while (p1_off < p1->num_objects * p1_step &&
p2_off < p2->num_objects * p2_step)
{
int cmp = hashcmp(p1_base + p1_off, p2_base + p2_off);
/* cmp ~ p1 - p2 */
if (cmp == 0) {
ret++;
p1_off += p1_step;
p2_off += p2_step;
continue;
}
if (cmp < 0) { /* p1 has the object, p2 doesn't */
p1_off += p1_step;
} else { /* p2 has the object, p1 doesn't */
p2_off += p2_step;
}
}
return ret;
}
/* another O(n^2) function ... */
static size_t get_pack_redundancy(struct pack_list *pl)
{
struct pack_list *subset;
size_t ret = 0;
if (pl == NULL)
return 0;
while ((subset = pl->next)) {
while (subset) {
ret += sizeof_union(pl->pack, subset->pack);
subset = subset->next;
}
pl = pl->next;
}
return ret;
}
static inline off_t pack_set_bytecount(struct pack_list *pl)
{
off_t ret = 0;
while (pl) {
ret += pl->pack->pack_size;
ret += pl->pack->index_size;
pl = pl->next;
}
return ret;
}
static int cmp_remaining_objects(const void *a, const void *b)
{
struct pack_list *pl_a = *((struct pack_list **)a);
struct pack_list *pl_b = *((struct pack_list **)b);
if (pl_a->remaining_objects->size == pl_b->remaining_objects->size) {
/* have the same remaining_objects, big pack first */
if (pl_a->all_objects_size == pl_b->all_objects_size)
return 0;
else if (pl_a->all_objects_size < pl_b->all_objects_size)
return 1;
else
return -1;
} else if (pl_a->remaining_objects->size < pl_b->remaining_objects->size) {
/* sort by remaining objects, more objects first */
return 1;
} else {
return -1;
}
}
/* Sort pack_list, greater size of remaining_objects first */
static void sort_pack_list(struct pack_list **pl)
{
struct pack_list **ary, *p;
int i;
size_t n = pack_list_size(*pl);
if (n < 2)
return;
/* prepare an array of packed_list for easier sorting */
ary = xcalloc(n, sizeof(struct pack_list *));
for (n = 0, p = *pl; p; p = p->next)
ary[n++] = p;
QSORT(ary, n, cmp_remaining_objects);
/* link them back again */
for (i = 0; i < n - 1; i++)
ary[i]->next = ary[i + 1];
ary[n - 1]->next = NULL;
*pl = ary[0];
free(ary);
}
static void minimize(struct pack_list **min)
{
struct pack_list *pl, *unique = NULL, *non_unique = NULL;
struct llist *missing, *unique_pack_objects;
pl = local_packs;
while (pl) {
if (pl->unique_objects->size)
pack_list_insert(&unique, pl);
else
pack_list_insert(&non_unique, pl);
pl = pl->next;
}
/* find out which objects are missing from the set of unique packs */
missing = llist_copy(all_objects);
pl = unique;
while (pl) {
llist_sorted_difference_inplace(missing, pl->remaining_objects);
pl = pl->next;
}
*min = unique;
/* return if there are no objects missing from the unique set */
if (missing->size == 0) {
free(missing);
return;
}
unique_pack_objects = llist_copy(all_objects);
llist_sorted_difference_inplace(unique_pack_objects, missing);
/* remove unique pack objects from the non_unique packs */
pl = non_unique;
while (pl) {
llist_sorted_difference_inplace(pl->remaining_objects, unique_pack_objects);
pl = pl->next;
}
while (non_unique) {
/* sort the non_unique packs, greater size of remaining_objects first */
sort_pack_list(&non_unique);
if (non_unique->remaining_objects->size == 0)
break;
pack_list_insert(min, non_unique);
for (pl = non_unique->next; pl && pl->remaining_objects->size > 0; pl = pl->next)
llist_sorted_difference_inplace(pl->remaining_objects, non_unique->remaining_objects);
non_unique = non_unique->next;
}
}
static void load_all_objects(void)
{
struct pack_list *pl = local_packs;
struct llist_item *hint, *l;
llist_init(&all_objects);
while (pl) {
hint = NULL;
l = pl->remaining_objects->front;
while (l) {
hint = llist_insert_sorted_unique(all_objects,
l->oid, hint);
l = l->next;
}
pl = pl->next;
}
/* remove objects present in remote packs */
pl = altodb_packs;
while (pl) {
llist_sorted_difference_inplace(all_objects, pl->remaining_objects);
pl = pl->next;
}
}
/* this scales like O(n^2) */
static void cmp_local_packs(void)
{
struct pack_list *subset, *pl = local_packs;
while ((subset = pl)) {
while ((subset = subset->next))
cmp_two_packs(pl, subset);
pl = pl->next;
}
}
static void scan_alt_odb_packs(void)
{
struct pack_list *local, *alt;
alt = altodb_packs;
while (alt) {
local = local_packs;
while (local) {
llist_sorted_difference_inplace(local->remaining_objects,
alt->remaining_objects);
local = local->next;
}
alt = alt->next;
}
}
static struct pack_list * add_pack(struct packed_git *p)
{
struct pack_list l;
size_t off = 0, step;
const unsigned char *base;
if (!p->pack_local && !(alt_odb || verbose))
return NULL;
l.pack = p;
llist_init(&l.remaining_objects);
if (open_pack_index(p))
return NULL;
base = p->index_data;
base += 256 * 4 + ((p->index_version < 2) ? 4 : 8);
step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0);
while (off < p->num_objects * step) {
llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off));
off += step;
}
l.all_objects_size = l.remaining_objects->size;
l.unique_objects = NULL;
if (p->pack_local)
return pack_list_insert(&local_packs, &l);
else
return pack_list_insert(&altodb_packs, &l);
}
static struct pack_list * add_pack_file(const char *filename)
{
struct packed_git *p = get_all_packs(the_repository);
if (strlen(filename) < 40)
die("Bad pack filename: %s", filename);
while (p) {
if (strstr(p->pack_name, filename))
return add_pack(p);
p = p->next;
}
die("Filename %s not found in packed_git", filename);
}
static void load_all(void)
{
struct packed_git *p = get_all_packs(the_repository);
while (p) {
add_pack(p);
p = p->next;
}
}
int cmd_pack_redundant(int argc, const char **argv, const char *prefix)
{
int i;
int i_still_use_this = 0;
struct pack_list *min = NULL, *red, *pl;
struct llist *ignore;
struct object_id *oid;
char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
if (argc == 2 && !strcmp(argv[1], "-h"))
usage(pack_redundant_usage);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--")) {
i++;
break;
}
if (!strcmp(arg, "--all")) {
load_all_packs = 1;
continue;
}
if (!strcmp(arg, "--verbose")) {
verbose = 1;
continue;
}
if (!strcmp(arg, "--alt-odb")) {
alt_odb = 1;
continue;
}
if (!strcmp(arg, "--i-still-use-this")) {
i_still_use_this = 1;
continue;
}
if (*arg == '-')
usage(pack_redundant_usage);
else
break;
}
if (!i_still_use_this) {
fputs(_("'git pack-redundant' is nominated for removal.\n"
"If you still use this command, please add an extra\n"
"option, '--i-still-use-this', on the command line\n"
"and let us know you still use it by sending an e-mail\n"
"to <git@vger.kernel.org>. Thanks.\n"), stderr);
}
if (load_all_packs)
load_all();
else
while (*(argv + i) != NULL)
add_pack_file(*(argv + i++));
if (local_packs == NULL)
die("Zero packs found!");
load_all_objects();
if (alt_odb)
scan_alt_odb_packs();
/* ignore objects given on stdin */
llist_init(&ignore);
if (!isatty(0)) {
while (fgets(buf, sizeof(buf), stdin)) {
oid = xmalloc(sizeof(*oid));
if (get_oid_hex(buf, oid))
die("Bad object ID on stdin: %s", buf);
llist_insert_sorted_unique(ignore, oid, NULL);
}
}
llist_sorted_difference_inplace(all_objects, ignore);
pl = local_packs;
while (pl) {
llist_sorted_difference_inplace(pl->remaining_objects, ignore);
pl = pl->next;
}
cmp_local_packs();
minimize(&min);
if (verbose) {
fprintf(stderr, "There are %lu packs available in alt-odbs.\n",
(unsigned long)pack_list_size(altodb_packs));
fprintf(stderr, "The smallest (bytewise) set of packs is:\n");
pl = min;
while (pl) {
fprintf(stderr, "\t%s\n", pl->pack->pack_name);
pl = pl->next;
}
fprintf(stderr, "containing %lu duplicate objects "
"with a total size of %lukb.\n",
(unsigned long)get_pack_redundancy(min),
(unsigned long)pack_set_bytecount(min)/1024);
fprintf(stderr, "A total of %lu unique objects were considered.\n",
(unsigned long)all_objects->size);
fprintf(stderr, "Redundant packs (with indexes):\n");
}
pl = red = pack_list_difference(local_packs, min);
while (pl) {
printf("%s\n%s\n",
sha1_pack_index_name(pl->pack->hash),
pl->pack->pack_name);
pl = pl->next;
}
if (verbose)
fprintf(stderr, "%luMB of redundant packs in total.\n",
(unsigned long)pack_set_bytecount(red)/(1024*1024));
return 0;
}