From ec18b10bf20574fc6d60c966412a11c81f9c17e0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 10 Aug 2022 17:01:17 -0400 Subject: [PATCH 1/3] tree-walk: add a mechanism for getting non-canonicalized modes When using init_tree_desc() and tree_entry() to iterate over a tree, we always canonicalize the modes coming out of the tree. This is a good thing to prevent bugs or oddities in normal code paths, but it's counter-productive for tools like fsck that want to see the exact contents. We can address this by adding an option to avoid the extra canonicalization. A few notes on the implementation: - I've attached the new option to the tree_desc struct itself. The actual code change is in decode_tree_entry(), which is in turn called by the public update_tree_entry(), tree_entry(), and init_tree_desc() functions, plus their "gently" counterparts. By letting it ride along in the struct, we can avoid changing the signature of those functions, which are called many times. Plus it's conceptually simpler: you really want a particular iteration of a tree to be "raw" or not, rather than individual calls. - We still have to set the new option somewhere. The struct is initialized by init_tree_desc(). I added the new flags field only to the "gently" version. That avoids disturbing the much more numerous non-gentle callers, and it makes sense that anybody being careful about looking at raw modes would also be careful about bogus trees (i.e., the caller will be something like fsck in the first place). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 4 ++-- packfile.c | 2 +- tree-walk.c | 14 +++++++++----- tree-walk.h | 8 +++++++- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/fsck.c b/fsck.c index dd4822ba1b..5acc982a7c 100644 --- a/fsck.c +++ b/fsck.c @@ -308,7 +308,7 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op return -1; name = fsck_get_object_name(options, &tree->object.oid); - if (init_tree_desc_gently(&desc, tree->buffer, tree->size)) + if (init_tree_desc_gently(&desc, tree->buffer, tree->size, 0)) return -1; while (tree_entry_gently(&desc, &entry)) { struct object *obj; @@ -578,7 +578,7 @@ static int fsck_tree(const struct object_id *tree_oid, const char *o_name; struct name_stack df_dup_candidates = { NULL }; - if (init_tree_desc_gently(&desc, buffer, size)) { + if (init_tree_desc_gently(&desc, buffer, size, 0)) { retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); diff --git a/packfile.c b/packfile.c index 8e812a84a3..e5b1d0ed76 100644 --- a/packfile.c +++ b/packfile.c @@ -2243,7 +2243,7 @@ static int add_promisor_object(const struct object_id *oid, struct tree *tree = (struct tree *)obj; struct tree_desc desc; struct name_entry entry; - if (init_tree_desc_gently(&desc, tree->buffer, tree->size)) + if (init_tree_desc_gently(&desc, tree->buffer, tree->size, 0)) /* * Error messages are given when packs are * verified, so do not print any here. diff --git a/tree-walk.c b/tree-walk.c index 506234b4b8..74f4d710e8 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -47,17 +47,20 @@ static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned l /* Initialize the descriptor entry */ desc->entry.path = path; - desc->entry.mode = canon_mode(mode); + desc->entry.mode = (desc->flags & TREE_DESC_RAW_MODES) ? mode : canon_mode(mode); desc->entry.pathlen = len - 1; oidread(&desc->entry.oid, (const unsigned char *)path + len); return 0; } -static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err) +static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, + unsigned long size, struct strbuf *err, + enum tree_desc_flags flags) { desc->buffer = buffer; desc->size = size; + desc->flags = flags; if (size) return decode_tree_entry(desc, buffer, size, err); return 0; @@ -66,15 +69,16 @@ static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, u void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size) { struct strbuf err = STRBUF_INIT; - if (init_tree_desc_internal(desc, buffer, size, &err)) + if (init_tree_desc_internal(desc, buffer, size, &err, 0)) die("%s", err.buf); strbuf_release(&err); } -int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size) +int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size, + enum tree_desc_flags flags) { struct strbuf err = STRBUF_INIT; - int result = init_tree_desc_internal(desc, buffer, size, &err); + int result = init_tree_desc_internal(desc, buffer, size, &err, flags); if (result) error("%s", err.buf); strbuf_release(&err); diff --git a/tree-walk.h b/tree-walk.h index a5058469e9..6305d53150 100644 --- a/tree-walk.h +++ b/tree-walk.h @@ -34,6 +34,11 @@ struct tree_desc { /* counts the number of bytes left in the `buffer`. */ unsigned int size; + + /* option flags passed via init_tree_desc_gently() */ + enum tree_desc_flags { + TREE_DESC_RAW_MODES = (1 << 0), + } flags; }; /** @@ -79,7 +84,8 @@ int update_tree_entry_gently(struct tree_desc *); */ void init_tree_desc(struct tree_desc *desc, const void *buf, unsigned long size); -int init_tree_desc_gently(struct tree_desc *desc, const void *buf, unsigned long size); +int init_tree_desc_gently(struct tree_desc *desc, const void *buf, unsigned long size, + enum tree_desc_flags flags); /* * Visit the next entry in a tree. Returns 1 when there are more entries From 53602a937dc9eacd67b6afcd781f7b15bb02682f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 10 Aug 2022 17:02:45 -0400 Subject: [PATCH 2/3] fsck: actually detect bad file modes in trees We use the normal tree_desc code to iterate over trees in fsck, meaning we only see the canonicalized modes it returns. And hence we'd never see anything unexpected, since it will coerce literally any garbage into one of our normal and accepted modes. We can use the new RAW_MODES flag to see the real modes, and then use the existing code to actually analyze them. The existing code is written as allow-known-good, so there's not much point in testing a variety of breakages. The one tested here should be S_IFREG but with nonsense permissions. Do note that the error-reporting here isn't great. We don't mention the specific bad mode, but just that the tree has one or more broken modes. But when you go to look at it with "git ls-tree", we'll report the canonicalized mode! This isn't ideal, but given that this should come up rarely, and that any number of other tree corruptions might force you into looking at the binary bytes via "cat-file", it's not the end of the world. And it's something we can improve on top later if we choose. Reported-by: Xavier Morel Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.c | 2 +- t/t1450-fsck.sh | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 5acc982a7c..b3da1d68c0 100644 --- a/fsck.c +++ b/fsck.c @@ -578,7 +578,7 @@ static int fsck_tree(const struct object_id *tree_oid, const char *o_name; struct name_stack df_dup_candidates = { NULL }; - if (init_tree_desc_gently(&desc, buffer, size, 0)) { + if (init_tree_desc_gently(&desc, buffer, size, TREE_DESC_RAW_MODES)) { retval += report(options, tree_oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index ab7f31f1dc..53c2aa10b7 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -364,6 +364,20 @@ test_expect_success 'tree entry with type mismatch' ' test_i18ngrep ! "dangling blob" out ' +test_expect_success 'tree entry with bogus mode' ' + test_when_finished "remove_object \$blob" && + test_when_finished "remove_object \$tree" && + blob=$(echo blob | git hash-object -w --stdin) && + blob_oct=$(echo $blob | hex2oct) && + tree=$(printf "100000 foo\0${blob_oct}" | + git hash-object -t tree --stdin -w --literally) && + git fsck 2>err && + cat >expect <<-EOF && + warning in tree $tree: badFilemode: contains bad file modes + EOF + test_cmp expect err +' + test_expect_success 'tag pointing to nonexistent' ' badoid=$(test_oid deadbeef) && cat >invalid-tag <<-EOF && From 4dd3b045f528b8d9cbbb4a50e371affb0543f37d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 10 Aug 2022 17:04:07 -0400 Subject: [PATCH 3/3] fsck: downgrade tree badFilemode to "info" The previous commit un-broke the "badFileMode" check; before then it was literally testing nothing. And as far as I can tell, it has been so since the very initial version of fsck. The current severity of "badFileMode" is just "warning". But in the --strict mode used by transfer.fsckObjects, that is elevated to an error. This will potentially cause hassle for users, because historical objects with bad modes will suddenly start causing pushes to many server operators to be rejected. At the same time, these bogus modes aren't actually a big risk. Because we canonicalize them everywhere besides fsck, they can't cause too much mischief in the real world. The worst thing you can do is end up with two almost-identical trees that have different hashes but are interpreted the same. That will generally cause things to be inefficient rather than wrong, and is a bug somebody working on a Git implementation would want to fix, but probably not worth inconveniencing users by refusing to push or fetch. So let's downgrade this to "info" by default, which is our setting for "mention this when fscking, but don't ever reject, even under strict mode". If somebody really wants to be paranoid, they can still adjust the level using config. Suggested-by: Xavier Morel Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- fsck.h | 2 +- t/t5504-fetch-receive-strict.sh | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fsck.h b/fsck.h index d07f7a2459..6f801e53b1 100644 --- a/fsck.h +++ b/fsck.h @@ -56,7 +56,6 @@ enum fsck_msg_type { FUNC(GITMODULES_PATH, ERROR) \ FUNC(GITMODULES_UPDATE, ERROR) \ /* warnings */ \ - FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ FUNC(FULL_PATHNAME, WARN) \ FUNC(HAS_DOT, WARN) \ @@ -66,6 +65,7 @@ enum fsck_msg_type { FUNC(ZERO_PADDED_FILEMODE, WARN) \ FUNC(NUL_IN_COMMIT, WARN) \ /* infos (reported as warnings, but ignored by default) */ \ + FUNC(BAD_FILEMODE, INFO) \ FUNC(GITMODULES_PARSE, INFO) \ FUNC(GITIGNORE_SYMLINK, INFO) \ FUNC(GITATTRIBUTES_SYMLINK, INFO) \ diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index b0b795aca9..ac4099ca89 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -352,4 +352,21 @@ test_expect_success \ grep "Cannot demote unterminatedheader" act ' +test_expect_success 'badFilemode is not a strict error' ' + git init --bare badmode.git && + tree=$( + cd badmode.git && + blob=$(echo blob | git hash-object -w --stdin | hex2oct) && + printf "123456 foo\0${blob}" | + git hash-object -t tree --stdin -w --literally + ) && + + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + + git -C badmode.git push ../dst.git $tree:refs/tags/tree 2>err && + grep "$tree: badFilemode" err +' + test_done