1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-10 11:56:09 +02:00
git/builtin-unpack-objects.c
Linus Torvalds 48fb7deb5b Fix big left-shifts of unsigned char
Shifting 'unsigned char' or 'unsigned short' left can result in sign
extension errors, since the C integer promotion rules means that the
unsigned char/short will get implicitly promoted to a signed 'int' due to
the shift (or due to other operations).

This normally doesn't matter, but if you shift things up sufficiently, it
will now set the sign bit in 'int', and a subsequent cast to a bigger type
(eg 'long' or 'unsigned long') will now sign-extend the value despite the
original expression being unsigned.

One example of this would be something like

	unsigned long size;
	unsigned char c;

	size += c << 24;

where despite all the variables being unsigned, 'c << 24' ends up being a
signed entity, and will get sign-extended when then doing the addition in
an 'unsigned long' type.

Since git uses 'unsigned char' pointers extensively, we actually have this
bug in a couple of places.

I may have missed some, but this is the result of looking at

	git grep '[^0-9 	][ 	]*<<[ 	][a-z]' -- '*.c' '*.h'
	git grep '<<[   ]*24'

which catches at least the common byte cases (shifting variables by a
variable amount, and shifting by 24 bits).

I also grepped for just 'unsigned char' variables in general, and
converted the ones that most obviously ended up getting implicitly cast
immediately anyway (eg hash_name(), encode_85()).

In addition to just avoiding 'unsigned char', this patch also tries to use
a common idiom for the delta header size thing. We had three different
variations on it: "& 0x7fUL" in one place (getting the sign extension
right), and "& ~0x80" and "& 0x7f" in two other places (not getting it
right). Apart from making them all just avoid using "unsigned char" at
all, I also unified them to then use a simple "& 0x7f".

I considered making a sparse extension which warns about doing implicit
casts from unsigned types to signed types, but it gets rather complex very
quickly, so this is just a hack.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-18 09:22:46 -07:00

565 lines
13 KiB
C

#include "builtin.h"
#include "cache.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
#include "tree-walk.h"
#include "progress.h"
#include "decorate.h"
#include "fsck.h"
static int dry_run, quiet, recover, has_errors, strict;
static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
/* We always read in 4kB chunks. */
static unsigned char buffer[4096];
static unsigned int offset, len;
static off_t consumed_bytes;
static git_SHA_CTX ctx;
/*
* When running under --strict mode, objects whose reachability are
* suspect are kept in core without getting written in the object
* store.
*/
struct obj_buffer {
char *buffer;
unsigned long size;
};
static struct decoration obj_decorate;
static struct obj_buffer *lookup_object_buffer(struct object *base)
{
return lookup_decoration(&obj_decorate, base);
}
static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
{
struct obj_buffer *obj;
obj = xcalloc(1, sizeof(struct obj_buffer));
obj->buffer = buffer;
obj->size = size;
if (add_decoration(&obj_decorate, object, obj))
die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1));
}
/*
* Make sure at least "min" bytes are available in the buffer, and
* return the pointer to the buffer.
*/
static void *fill(int min)
{
if (min <= len)
return buffer + offset;
if (min > sizeof(buffer))
die("cannot fill %d bytes", min);
if (offset) {
git_SHA1_Update(&ctx, buffer, offset);
memmove(buffer, buffer + offset, len);
offset = 0;
}
do {
ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
if (ret <= 0) {
if (!ret)
die("early EOF");
die("read error on input: %s", strerror(errno));
}
len += ret;
} while (len < min);
return buffer;
}
static void use(int bytes)
{
if (bytes > len)
die("used more bytes than were available");
len -= bytes;
offset += bytes;
/* make sure off_t is sufficiently large not to wrap */
if (consumed_bytes > consumed_bytes + bytes)
die("pack too large for current definition of off_t");
consumed_bytes += bytes;
}
static void *get_data(unsigned long size)
{
z_stream stream;
void *buf = xmalloc(size);
memset(&stream, 0, sizeof(stream));
stream.next_out = buf;
stream.avail_out = size;
stream.next_in = fill(1);
stream.avail_in = len;
git_inflate_init(&stream);
for (;;) {
int ret = git_inflate(&stream, 0);
use(len - stream.avail_in);
if (stream.total_out == size && ret == Z_STREAM_END)
break;
if (ret != Z_OK) {
error("inflate returned %d\n", ret);
free(buf);
buf = NULL;
if (!recover)
exit(1);
has_errors = 1;
break;
}
stream.next_in = fill(1);
stream.avail_in = len;
}
git_inflate_end(&stream);
return buf;
}
struct delta_info {
unsigned char base_sha1[20];
unsigned nr;
off_t base_offset;
unsigned long size;
void *delta;
struct delta_info *next;
};
static struct delta_info *delta_list;
static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
off_t base_offset,
void *delta, unsigned long size)
{
struct delta_info *info = xmalloc(sizeof(*info));
hashcpy(info->base_sha1, base_sha1);
info->base_offset = base_offset;
info->size = size;
info->delta = delta;
info->nr = nr;
info->next = delta_list;
delta_list = info;
}
struct obj_info {
off_t offset;
unsigned char sha1[20];
struct object *obj;
};
#define FLAG_OPEN (1u<<20)
#define FLAG_WRITTEN (1u<<21)
static struct obj_info *obj_list;
unsigned nr_objects;
/*
* Called only from check_object() after it verified this object
* is Ok.
*/
static void write_cached_object(struct object *obj)
{
unsigned char sha1[20];
struct obj_buffer *obj_buf = lookup_object_buffer(obj);
if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0)
die("failed to write object %s", sha1_to_hex(obj->sha1));
obj->flags |= FLAG_WRITTEN;
}
/*
* At the very end of the processing, write_rest() scans the objects
* that have reachability requirements and calls this function.
* Verify its reachability and validity recursively and write it out.
*/
static int check_object(struct object *obj, int type, void *data)
{
if (!obj)
return 0;
if (obj->flags & FLAG_WRITTEN)
return 1;
if (type != OBJ_ANY && obj->type != type)
die("object type mismatch");
if (!(obj->flags & FLAG_OPEN)) {
unsigned long size;
int type = sha1_object_info(obj->sha1, &size);
if (type != obj->type || type <= 0)
die("object of unexpected type");
obj->flags |= FLAG_WRITTEN;
return 1;
}
if (fsck_object(obj, 1, fsck_error_function))
die("Error in object");
if (!fsck_walk(obj, check_object, 0))
die("Error on reachable objects of %s", sha1_to_hex(obj->sha1));
write_cached_object(obj);
return 1;
}
static void write_rest(void)
{
unsigned i;
for (i = 0; i < nr_objects; i++)
check_object(obj_list[i].obj, OBJ_ANY, 0);
}
static void added_object(unsigned nr, enum object_type type,
void *data, unsigned long size);
/*
* Write out nr-th object from the list, now we know the contents
* of it. Under --strict, this buffers structured objects in-core,
* to be checked at the end.
*/
static void write_object(unsigned nr, enum object_type type,
void *buf, unsigned long size)
{
if (!strict) {
if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
die("failed to write object");
added_object(nr, type, buf, size);
free(buf);
obj_list[nr].obj = NULL;
} else if (type == OBJ_BLOB) {
struct blob *blob;
if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
die("failed to write object");
added_object(nr, type, buf, size);
free(buf);
blob = lookup_blob(obj_list[nr].sha1);
if (blob)
blob->object.flags |= FLAG_WRITTEN;
else
die("invalid blob object");
obj_list[nr].obj = NULL;
} else {
struct object *obj;
int eaten;
hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
added_object(nr, type, buf, size);
obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten);
if (!obj)
die("invalid %s", typename(type));
add_object_buffer(obj, buf, size);
obj->flags |= FLAG_OPEN;
obj_list[nr].obj = obj;
}
}
static void resolve_delta(unsigned nr, enum object_type type,
void *base, unsigned long base_size,
void *delta, unsigned long delta_size)
{
void *result;
unsigned long result_size;
result = patch_delta(base, base_size,
delta, delta_size,
&result_size);
if (!result)
die("failed to apply delta");
free(delta);
write_object(nr, type, result, result_size);
}
/*
* We now know the contents of an object (which is nr-th in the pack);
* resolve all the deltified objects that are based on it.
*/
static void added_object(unsigned nr, enum object_type type,
void *data, unsigned long size)
{
struct delta_info **p = &delta_list;
struct delta_info *info;
while ((info = *p) != NULL) {
if (!hashcmp(info->base_sha1, obj_list[nr].sha1) ||
info->base_offset == obj_list[nr].offset) {
*p = info->next;
p = &delta_list;
resolve_delta(info->nr, type, data, size,
info->delta, info->size);
free(info);
continue;
}
p = &info->next;
}
}
static void unpack_non_delta_entry(enum object_type type, unsigned long size,
unsigned nr)
{
void *buf = get_data(size);
if (!dry_run && buf)
write_object(nr, type, buf, size);
else
free(buf);
}
static int resolve_against_held(unsigned nr, const unsigned char *base,
void *delta_data, unsigned long delta_size)
{
struct object *obj;
struct obj_buffer *obj_buffer;
obj = lookup_object(base);
if (!obj)
return 0;
obj_buffer = lookup_object_buffer(obj);
if (!obj_buffer)
return 0;
resolve_delta(nr, obj->type, obj_buffer->buffer,
obj_buffer->size, delta_data, delta_size);
return 1;
}
static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
unsigned nr)
{
void *delta_data, *base;
unsigned long base_size;
unsigned char base_sha1[20];
if (type == OBJ_REF_DELTA) {
hashcpy(base_sha1, fill(20));
use(20);
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
}
if (has_sha1_file(base_sha1))
; /* Ok we have this one */
else if (resolve_against_held(nr, base_sha1,
delta_data, delta_size))
return; /* we are done */
else {
/* cannot resolve yet --- queue it */
hashcpy(obj_list[nr].sha1, null_sha1);
add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
return;
}
} else {
unsigned base_found = 0;
unsigned char *pack, c;
off_t base_offset;
unsigned lo, mid, hi;
pack = fill(1);
c = *pack;
use(1);
base_offset = c & 127;
while (c & 128) {
base_offset += 1;
if (!base_offset || MSB(base_offset, 7))
die("offset value overflow for delta base object");
pack = fill(1);
c = *pack;
use(1);
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = obj_list[nr].offset - base_offset;
if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
die("offset value out of bound for delta base object");
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
return;
}
lo = 0;
hi = nr;
while (lo < hi) {
mid = (lo + hi)/2;
if (base_offset < obj_list[mid].offset) {
hi = mid;
} else if (base_offset > obj_list[mid].offset) {
lo = mid + 1;
} else {
hashcpy(base_sha1, obj_list[mid].sha1);
base_found = !is_null_sha1(base_sha1);
break;
}
}
if (!base_found) {
/*
* The delta base object is itself a delta that
* has not been resolved yet.
*/
hashcpy(obj_list[nr].sha1, null_sha1);
add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
return;
}
}
if (resolve_against_held(nr, base_sha1, delta_data, delta_size))
return;
base = read_sha1_file(base_sha1, &type, &base_size);
if (!base) {
error("failed to read delta-pack base object %s",
sha1_to_hex(base_sha1));
if (!recover)
exit(1);
has_errors = 1;
return;
}
resolve_delta(nr, type, base, base_size, delta_data, delta_size);
free(base);
}
static void unpack_one(unsigned nr)
{
unsigned shift;
unsigned char *pack;
unsigned long size, c;
enum object_type type;
obj_list[nr].offset = consumed_bytes;
pack = fill(1);
c = *pack;
use(1);
type = (c >> 4) & 7;
size = (c & 15);
shift = 4;
while (c & 0x80) {
pack = fill(1);
c = *pack;
use(1);
size += (c & 0x7f) << shift;
shift += 7;
}
switch (type) {
case OBJ_COMMIT:
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
unpack_non_delta_entry(type, size, nr);
return;
case OBJ_REF_DELTA:
case OBJ_OFS_DELTA:
unpack_delta_entry(type, size, nr);
return;
default:
error("bad object type %d", type);
has_errors = 1;
if (recover)
return;
exit(1);
}
}
static void unpack_all(void)
{
int i;
struct progress *progress = NULL;
struct pack_header *hdr = fill(sizeof(struct pack_header));
nr_objects = ntohl(hdr->hdr_entries);
if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
die("bad pack file");
if (!pack_version_ok(hdr->hdr_version))
die("unknown pack file version %"PRIu32,
ntohl(hdr->hdr_version));
use(sizeof(struct pack_header));
if (!quiet)
progress = start_progress("Unpacking objects", nr_objects);
obj_list = xcalloc(nr_objects, sizeof(*obj_list));
for (i = 0; i < nr_objects; i++) {
unpack_one(i);
display_progress(progress, i + 1);
}
stop_progress(&progress);
if (delta_list)
die("unresolved deltas left after unpacking");
}
int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
{
int i;
unsigned char sha1[20];
git_config(git_default_config, NULL);
quiet = !isatty(2);
for (i = 1 ; i < argc; i++) {
const char *arg = argv[i];
if (*arg == '-') {
if (!strcmp(arg, "-n")) {
dry_run = 1;
continue;
}
if (!strcmp(arg, "-q")) {
quiet = 1;
continue;
}
if (!strcmp(arg, "-r")) {
recover = 1;
continue;
}
if (!strcmp(arg, "--strict")) {
strict = 1;
continue;
}
if (!prefixcmp(arg, "--pack_header=")) {
struct pack_header *hdr;
char *c;
hdr = (struct pack_header *)buffer;
hdr->hdr_signature = htonl(PACK_SIGNATURE);
hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
if (*c != ',')
die("bad %s", arg);
hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
if (*c)
die("bad %s", arg);
len = sizeof(*hdr);
continue;
}
usage(unpack_usage);
}
/* We don't take any non-flag arguments now.. Maybe some day */
usage(unpack_usage);
}
git_SHA1_Init(&ctx);
unpack_all();
git_SHA1_Update(&ctx, buffer, offset);
git_SHA1_Final(sha1, &ctx);
if (strict)
write_rest();
if (hashcmp(fill(20), sha1))
die("final sha1 did not match");
use(20);
/* Write the last part of the buffer to stdout */
while (len) {
int ret = xwrite(1, buffer + offset, len);
if (ret <= 0)
break;
len -= ret;
offset += ret;
}
/* All done */
return has_errors;
}