1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-28 15:06:11 +02:00

unpack-objects: low memory footprint for get_data() in dry_run mode

As the name implies, "get_data(size)" will allocate and return a given
amount of memory. Allocating memory for a large blob object may cause the
system to run out of memory. Before preparing to replace calling of
"get_data()" to unpack large blob objects in latter commits, refactor
"get_data()" to reduce memory footprint for dry_run mode.

Because in dry_run mode, "get_data()" is only used to check the
integrity of data, and the returned buffer is not used at all, we can
allocate a smaller buffer and use it as zstream output. Make the function
return NULL in the dry-run mode, as no callers use the returned buffer.

The "find [...]objects/?? -type f | wc -l" test idiom being used here
is adapted from the same "find" use added to another test in
d9545c7f46 (fast-import: implement unpack limit, 2016-04-25).

Suggested-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <chiyutianyi@gmail.com>
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Han Xin 2022-06-11 10:44:16 +08:00 committed by Junio C Hamano
parent ab336e8f1c
commit a1bf5ca29f
2 changed files with 67 additions and 11 deletions

View File

@ -97,15 +97,27 @@ static void use(int bytes)
display_throughput(progress, consumed_bytes);
}
/*
* Decompress zstream from the standard input into a newly
* allocated buffer of specified size and return the buffer.
* The caller is responsible to free the returned buffer.
*
* But for dry_run mode, "get_data()" is only used to check the
* integrity of data, and the returned buffer is not used at all.
* Therefore, in dry_run mode, "get_data()" will release the small
* allocated buffer which is reused to hold temporary zstream output
* and return NULL instead of returning garbage data.
*/
static void *get_data(unsigned long size)
{
git_zstream stream;
void *buf = xmallocz(size);
unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
void *buf = xmallocz(bufsize);
memset(&stream, 0, sizeof(stream));
stream.next_out = buf;
stream.avail_out = size;
stream.avail_out = bufsize;
stream.next_in = fill(1);
stream.avail_in = len;
git_inflate_init(&stream);
@ -125,8 +137,17 @@ static void *get_data(unsigned long size)
}
stream.next_in = fill(1);
stream.avail_in = len;
if (dry_run) {
/* reuse the buffer in dry_run mode */
stream.next_out = buf;
stream.avail_out = bufsize > size - stream.total_out ?
size - stream.total_out :
bufsize;
}
}
git_inflate_end(&stream);
if (dry_run)
FREE_AND_NULL(buf);
return buf;
}
@ -326,10 +347,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
{
void *buf = get_data(size);
if (!dry_run && buf)
if (buf)
write_object(nr, type, buf, size);
else
free(buf);
}
static int resolve_against_held(unsigned nr, const struct object_id *base,
@ -359,10 +378,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
oidread(&base_oid, fill(the_hash_algo->rawsz));
use(the_hash_algo->rawsz);
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
if (!delta_data)
return;
}
if (has_object_file(&base_oid))
; /* Ok we have this one */
else if (resolve_against_held(nr, &base_oid,
@ -398,10 +415,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
die("offset value out of bound for delta base object");
delta_data = get_data(delta_size);
if (dry_run || !delta_data) {
free(delta_data);
if (!delta_data)
return;
}
lo = 0;
hi = nr;
while (lo < hi) {

41
t/t5351-unpack-large-objects.sh Executable file
View File

@ -0,0 +1,41 @@
#!/bin/sh
#
# Copyright (c) 2022 Han Xin
#
test_description='git unpack-objects with large objects'
. ./test-lib.sh
prepare_dest () {
test_when_finished "rm -rf dest.git" &&
git init --bare dest.git
}
test_expect_success "create large objects (1.5 MB) and PACK" '
test-tool genrandom foo 1500000 >big-blob &&
test_commit --append foo big-blob &&
test-tool genrandom bar 1500000 >big-blob &&
test_commit --append bar big-blob &&
PACK=$(echo HEAD | git pack-objects --revs pack)
'
test_expect_success 'set memory limitation to 1MB' '
GIT_ALLOC_LIMIT=1m &&
export GIT_ALLOC_LIMIT
'
test_expect_success 'unpack-objects failed under memory limitation' '
prepare_dest &&
test_must_fail git -C dest.git unpack-objects <pack-$PACK.pack 2>err &&
grep "fatal: attempting to allocate" err
'
test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
prepare_dest &&
git -C dest.git unpack-objects -n <pack-$PACK.pack &&
test_stdout_line_count = 0 find dest.git/objects -type f &&
test_dir_is_empty dest.git/objects/pack
'
test_done