From a2bc523e1ea2ef9b59eb0c26331b6e7d9dc5a812 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 31 May 2024 08:00:34 -0400 Subject: [PATCH 1/2] dir.c: skip .gitignore, etc larger than INT_MAX We use add_patterns() to read .gitignore, .git/info/exclude, etc, as well as other pattern-like files like sparse-checkout. The parser for these uses an "int" as an index, meaning that files over 2GB will generally cause signed integer overflow and out-of-bounds access. This is unlikely to happen in any real files, but we do read .gitignore files from the tree. A malicious tree could cause an out-of-bounds read and segfault (we also write NULs over newlines, so in theory it could be an out-of-bounds write, too, but as we go char-by-char, the first thing that happens is trying to read a negative 2GB offset). We could fix the most obvious issue by replacing one "int" with a "size_t". But there are tons of "int" sprinkled throughout this code for things like pattern lengths, number of patterns, and so on. Since nobody would actually want a 2GB .gitignore file, an easy defensive measure is to just refuse to parse them. The "int" in question is in add_patterns_from_buffer(), so we could catch it there. But by putting the checks in its two callers, we can produce more useful error messages. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dir.c b/dir.c index 20ebe4cba2..6913003164 100644 --- a/dir.c +++ b/dir.c @@ -30,6 +30,7 @@ #include "symlinks.h" #include "trace2.h" #include "tree.h" +#include "hex.h" /* * Tells read_directory_recursive how a file or directory should be treated. @@ -1136,6 +1137,12 @@ static int add_patterns(const char *fname, const char *base, int baselen, } } + if (size > INT_MAX) { + warning("ignoring excessively large pattern file: %s", fname); + free(buf); + return -1; + } + add_patterns_from_buffer(buf, size, base, baselen, pl); return 0; } @@ -1192,6 +1199,13 @@ int add_patterns_from_blob_to_list( if (r != 1) return r; + if (size > INT_MAX) { + warning("ignoring excessively large pattern blob: %s", + oid_to_hex(oid)); + free(buf); + return -1; + } + add_patterns_from_buffer(buf, size, base, baselen, pl); return 0; } From e7c3d1ddba0b3eb9d52780588636833055830c6f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 5 Jun 2024 04:03:08 -0400 Subject: [PATCH 2/2] dir.c: reduce max pattern file size to 100MB In a2bc523e1e (dir.c: skip .gitignore, etc larger than INT_MAX, 2024-05-31) we put capped the size of some files whose parsing code and data structures used ints. Setting the limit to INT_MAX was a natural spot, since we know the parsing code would misbehave above that. But it also leaves the possibility of overflow errors when we multiply that limit to allocate memory. For instance, a file consisting only of "a\na\n..." could have INT_MAX/2 entries. Allocating an array of pointers for each would need INT_MAX*4 bytes on a 64-bit system, enough to overflow a 32-bit int. So let's give ourselves a bit more safety margin by giving a much smaller limit. The size 100MB is somewhat arbitrary, but is based on the similar value for attribute files added by 3c50032ff5 (attr: ignore overly large gitattributes files, 2022-12-01). There's no particular reason these have to be the same, but the idea is that they are in the ballpark of "so huge that nobody would care, but small enough to avoid malicious overflow". So lacking a better guess, it makes sense to use the same value. The implementation here doesn't share the same constant, but we could change that later (or even give it a runtime config knob, though nobody has complained yet about the attribute limit). And likewise, let's add a few tests that exercise the limits, based on the attr ones. In this case, though, we never read .gitignore from the index; the blob code is exercised only for sparse filters. So we'll trigger it that way. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- dir.c | 10 ++++++++-- t/t0008-ignores.sh | 8 ++++++++ t/t6112-rev-list-filters-objects.sh | 12 ++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 6913003164..d477875206 100644 --- a/dir.c +++ b/dir.c @@ -32,6 +32,12 @@ #include "tree.h" #include "hex.h" + /* + * The maximum size of a pattern/exclude file. If the file exceeds this size + * we will ignore it. + */ +#define PATTERN_MAX_FILE_SIZE (100 * 1024 * 1024) + /* * Tells read_directory_recursive how a file or directory should be treated. * Values are ordered by significance, e.g. if a directory contains both @@ -1137,7 +1143,7 @@ static int add_patterns(const char *fname, const char *base, int baselen, } } - if (size > INT_MAX) { + if (size > PATTERN_MAX_FILE_SIZE) { warning("ignoring excessively large pattern file: %s", fname); free(buf); return -1; @@ -1199,7 +1205,7 @@ int add_patterns_from_blob_to_list( if (r != 1) return r; - if (size > INT_MAX) { + if (size > PATTERN_MAX_FILE_SIZE) { warning("ignoring excessively large pattern blob: %s", oid_to_hex(oid)); free(buf); diff --git a/t/t0008-ignores.sh b/t/t0008-ignores.sh index 361446b2f4..02a18d4fdb 100755 --- a/t/t0008-ignores.sh +++ b/t/t0008-ignores.sh @@ -945,4 +945,12 @@ test_expect_success SYMLINKS 'symlinks not respected in-tree' ' test_grep "unable to access.*gitignore" err ' +test_expect_success EXPENSIVE 'large exclude file ignored in tree' ' + test_when_finished "rm .gitignore" && + dd if=/dev/zero of=.gitignore bs=101M count=1 && + git ls-files -o --exclude-standard 2>err && + echo "warning: ignoring excessively large pattern file: .gitignore" >expect && + test_cmp expect err +' + test_done diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh index 43e1afd44c..0387f35a32 100755 --- a/t/t6112-rev-list-filters-objects.sh +++ b/t/t6112-rev-list-filters-objects.sh @@ -701,4 +701,16 @@ test_expect_success 'expand blob limit in protocol' ' grep "blob:limit=1024" trace ' +test_expect_success EXPENSIVE 'large sparse filter file ignored' ' + blob=$(dd if=/dev/zero bs=101M count=1 | + git hash-object -w --stdin) && + test_must_fail \ + git rev-list --all --objects --filter=sparse:oid=$blob 2>err && + cat >expect <<-EOF && + warning: ignoring excessively large pattern blob: $blob + fatal: unable to parse sparse filter data in $blob + EOF + test_cmp expect err +' + test_done