1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-28 05:46:38 +02:00

Added automatic index generation to fast-import.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
This commit is contained in:
Shawn O. Pearce 2006-08-06 13:51:39 -04:00
parent db5e523fdd
commit 8bcce30126

View File

@ -8,11 +8,75 @@
static int max_depth = 10;
static unsigned long object_count;
static unsigned long duplicate_count;
static unsigned long packoff;
static unsigned long overflow_count;
static int packfd;
static int current_depth;
static void *lastdat;
static unsigned long lastdatlen;
static unsigned char lastsha1[20];
static unsigned char packsha1[20];
struct object_entry
{
struct object_entry *next;
unsigned long offset;
unsigned char sha1[20];
};
struct overflow_object_entry
{
struct overflow_object_entry *next;
struct object_entry oe;
};
struct object_entry *pool_start;
struct object_entry *pool_next;
struct object_entry *pool_end;
struct overflow_object_entry *overflow;
struct object_entry *table[1 << 16];
static struct object_entry* new_object(unsigned char *sha1)
{
if (pool_next != pool_end) {
struct object_entry *e = pool_next++;
memcpy(e->sha1, sha1, sizeof(e->sha1));
return e;
} else {
struct overflow_object_entry *e;
e = xmalloc(sizeof(struct overflow_object_entry));
e->next = overflow;
memcpy(e->oe.sha1, sha1, sizeof(e->oe.sha1));
overflow = e;
overflow_count++;
return &e->oe;
}
}
static struct object_entry* insert_object(unsigned char *sha1)
{
unsigned int h = sha1[0] << 8 | sha1[1];
struct object_entry *e = table[h];
struct object_entry *p = 0;
while (e) {
if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
return e;
p = e;
e = e->next;
}
e = new_object(sha1);
e->next = 0;
e->offset = 0;
if (p)
p->next = e;
else
table[h] = e;
return e;
}
static ssize_t yread(int fd, void *buffer, size_t length)
{
@ -66,7 +130,7 @@ static unsigned long encode_header(enum object_type type, unsigned long size, un
return n;
}
static void write_blob (void *dat, unsigned long datlen)
static void write_blob(void *dat, unsigned long datlen)
{
z_stream s;
void *out, *delta;
@ -92,6 +156,7 @@ static void write_blob (void *dat, unsigned long datlen)
die("Can't write object header: %s", strerror(errno));
if (ywrite(packfd, lastsha1, sizeof(lastsha1)) != sizeof(lastsha1))
die("Can't write object base: %s", strerror(errno));
packoff += hdrlen + sizeof(lastsha1);
} else {
current_depth = 0;
s.next_in = dat;
@ -99,6 +164,7 @@ static void write_blob (void *dat, unsigned long datlen)
hdrlen = encode_header(OBJ_BLOB, datlen, hdr);
if (ywrite(packfd, hdr, hdrlen) != hdrlen)
die("Can't write object header: %s", strerror(errno));
packoff += hdrlen;
}
s.avail_out = deflateBound(&s, s.avail_in);
@ -109,13 +175,14 @@ static void write_blob (void *dat, unsigned long datlen)
if (ywrite(packfd, out, s.total_out) != s.total_out)
die("Failed writing compressed data %s", strerror(errno));
packoff += s.total_out;
free(out);
if (delta)
free(delta);
}
static void init_pack_header ()
static void init_pack_header()
{
const char* magic = "PACK";
unsigned long version = 2;
@ -129,13 +196,13 @@ static void init_pack_header ()
die("Can't write pack version: %s", strerror(errno));
if (ywrite(packfd, &zero, 4) != 4)
die("Can't write 0 object count: %s", strerror(errno));
packoff = 4 * 3;
}
static void fixup_header_footer ()
static void fixup_header_footer()
{
SHA_CTX c;
char hdr[8];
unsigned char sha1[20];
unsigned long cnt;
char *buf;
size_t n;
@ -148,7 +215,6 @@ static void fixup_header_footer ()
die("Failed reading header: %s", strerror(errno));
SHA1_Update(&c, hdr, 8);
fprintf(stderr, "%lu objects\n", object_count);
cnt = htonl(object_count);
SHA1_Update(&c, &cnt, 4);
if (ywrite(packfd, &cnt, 4) != 4)
@ -163,16 +229,81 @@ fprintf(stderr, "%lu objects\n", object_count);
}
free(buf);
SHA1_Final(sha1, &c);
if (ywrite(packfd, sha1, sizeof(sha1)) != sizeof(sha1))
SHA1_Final(packsha1, &c);
if (ywrite(packfd, packsha1, sizeof(packsha1)) != sizeof(packsha1))
die("Failed writing pack checksum: %s", strerror(errno));
}
int main (int argc, const char **argv)
static int oecmp (const void *_a, const void *_b)
{
packfd = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0666);
struct object_entry *a = *((struct object_entry**)_a);
struct object_entry *b = *((struct object_entry**)_b);
return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
}
static void write_index(const char *idx_name)
{
struct sha1file *f;
struct object_entry **idx, **c, **last;
struct object_entry *e;
struct overflow_object_entry *o;
unsigned int array[256];
int i;
/* Build the sorted table of object IDs. */
idx = xmalloc(object_count * sizeof(struct object_entry*));
c = idx;
for (e = pool_start; e != pool_next; e++)
*c++ = e;
for (o = overflow; o; o = o->next)
*c++ = &o->oe;
last = idx + object_count;
qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
/* Generate the fan-out array. */
c = idx;
for (i = 0; i < 256; i++) {
struct object_entry **next = c;;
while (next < last) {
if ((*next)->sha1[0] != i)
break;
next++;
}
array[i] = htonl(next - idx);
c = next;
}
f = sha1create("%s", idx_name);
sha1write(f, array, 256 * sizeof(int));
for (c = idx; c != last; c++) {
unsigned int offset = htonl((*c)->offset);
sha1write(f, &offset, 4);
sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
}
sha1write(f, packsha1, sizeof(packsha1));
sha1close(f, NULL, 1);
free(idx);
}
int main(int argc, const char **argv)
{
const char *base_name = argv[1];
int est_obj_cnt = atoi(argv[2]);
char *pack_name;
char *idx_name;
pack_name = xmalloc(strlen(base_name) + 6);
sprintf(pack_name, "%s.pack", base_name);
idx_name = xmalloc(strlen(base_name) + 5);
sprintf(idx_name, "%s.idx", base_name);
packfd = open(pack_name, O_RDWR|O_CREAT|O_TRUNC, 0666);
if (packfd < 0)
die("Can't create pack file %s: %s", argv[1], strerror(errno));
die("Can't create pack file %s: %s", pack_name, strerror(errno));
pool_start = xmalloc(est_obj_cnt * sizeof(struct object_entry));
pool_next = pool_start;
pool_end = pool_start + est_obj_cnt;
init_pack_header();
for (;;) {
@ -182,8 +313,10 @@ int main (int argc, const char **argv)
char hdr[128];
unsigned char sha1[20];
SHA_CTX c;
struct object_entry *e;
if (yread(0, &datlen, 4) != 4)
break;
dat = xmalloc(datlen);
@ -196,19 +329,30 @@ int main (int argc, const char **argv)
SHA1_Update(&c, dat, datlen);
SHA1_Final(sha1, &c);
write_blob(dat, datlen);
object_count++;
printf("%s\n", sha1_to_hex(sha1));
fflush(stdout);
e = insert_object(sha1);
if (!e->offset) {
e->offset = packoff;
write_blob(dat, datlen);
object_count++;
printf("%s\n", sha1_to_hex(sha1));
fflush(stdout);
if (lastdat)
free(lastdat);
lastdat = dat;
lastdatlen = datlen;
memcpy(lastsha1, sha1, sizeof(sha1));
if (lastdat)
free(lastdat);
lastdat = dat;
lastdatlen = datlen;
memcpy(lastsha1, sha1, sizeof(sha1));
} else {
duplicate_count++;
free(dat);
}
}
fixup_header_footer();
close(packfd);
write_index(idx_name);
fprintf(stderr, "%lu objects, %lu duplicates, %lu pool overflow\n",
object_count, duplicate_count, overflow_count);
return 0;
}