From 8840069a37e69129ed3c2792ddb172557f98e205 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 24 Feb 2023 03:07:32 -0500 Subject: [PATCH 1/4] fsck: factor out index fsck The code to fsck an index operates directly on the_index. Let's move it into its own function in preparation for handling the index files from other worktrees. Since we now have only a single reference to the_index, let's drop our USE_THE_INDEX_VARIABLE definition and just use the_repository.index directly. That's a minor cleanup, but also ensures that we didn't miss any references when moving the code into fsck_index(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 54 ++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index d207bd909b..fa101e0db2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -1,4 +1,3 @@ -#define USE_THE_INDEX_VARIABLE #include "builtin.h" #include "cache.h" #include "repository.h" @@ -796,6 +795,35 @@ static int fsck_resolve_undo(struct index_state *istate) return 0; } +static void fsck_index(struct index_state *istate) +{ + unsigned int i; + + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); + for (i = 0; i < istate->cache_nr; i++) { + unsigned int mode; + struct blob *blob; + struct object *obj; + + mode = istate->cache[i]->ce_mode; + if (S_ISGITLINK(mode)) + continue; + blob = lookup_blob(the_repository, + &istate->cache[i]->oid); + if (!blob) + continue; + obj = &blob->object; + obj->flags |= USED; + fsck_put_object_name(&fsck_walk_options, &obj->oid, + ":%s", istate->cache[i]->name); + mark_object_reachable(obj); + } + if (istate->cache_tree) + fsck_cache_tree(istate->cache_tree); + fsck_resolve_undo(istate); +} + static void mark_object_for_connectivity(const struct object_id *oid) { struct object *obj = lookup_unknown_object(the_repository, oid); @@ -959,29 +987,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) verify_index_checksum = 1; verify_ce_order = 1; repo_read_index(the_repository); - /* TODO: audit for interaction with sparse-index. */ - ensure_full_index(&the_index); - for (i = 0; i < the_index.cache_nr; i++) { - unsigned int mode; - struct blob *blob; - struct object *obj; - - mode = the_index.cache[i]->ce_mode; - if (S_ISGITLINK(mode)) - continue; - blob = lookup_blob(the_repository, - &the_index.cache[i]->oid); - if (!blob) - continue; - obj = &blob->object; - obj->flags |= USED; - fsck_put_object_name(&fsck_walk_options, &obj->oid, - ":%s", the_index.cache[i]->name); - mark_object_reachable(obj); - } - if (the_index.cache_tree) - fsck_cache_tree(the_index.cache_tree); - fsck_resolve_undo(&the_index); + fsck_index(the_repository->index); } check_connectivity(); From fb64ca526a7c695aa137c2d2577585ddea5cce28 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 24 Feb 2023 03:09:57 -0500 Subject: [PATCH 2/4] fsck: check index files in all worktrees We check the index file for the main worktree, but completely ignore the index files in other worktrees. These should be checked, too, as they are part of the repository state (and in particular, errors in those index files may cause repo-wide operations like "git gc" to complain). Reported-by: Johannes Sixt Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 18 ++++++++++++++++-- t/t1450-fsck.sh | 12 ++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index fa101e0db2..c11cb2a95f 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -984,10 +984,24 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } if (keep_cache_objects) { + struct worktree **worktrees, **p; + verify_index_checksum = 1; verify_ce_order = 1; - repo_read_index(the_repository); - fsck_index(the_repository->index); + + worktrees = get_worktrees(); + for (p = worktrees; *p; p++) { + struct worktree *wt = *p; + struct index_state istate = + INDEX_STATE_INIT(the_repository); + + if (read_index_from(&istate, + worktree_git_path(wt, "index"), + get_worktree_git_dir(wt)) > 0) + fsck_index(&istate); + discard_index(&istate); + } + free_worktrees(worktrees); } check_connectivity(); diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index de0f6d5e7f..e01a519a69 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -1023,4 +1023,16 @@ test_expect_success 'fsck error on gitattributes with excessive size' ' test_cmp expected actual ' +test_expect_success 'fsck detects problems in worktree index' ' + test_when_finished "git worktree remove -f wt" && + git worktree add wt && + + echo "this will be removed to break the worktree index" >wt/file && + git -C wt add file && + blob=$(git -C wt rev-parse :file) && + remove_object $blob && + + test_must_fail git fsck +' + test_done From 592ec63b38ca7e2fb069ce1bf41b47a6f5a4ef8a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 24 Feb 2023 03:12:11 -0500 Subject: [PATCH 3/4] fsck: mention file path for index errors If we encounter an error in an index file, we may say something like: error: 1234abcd: invalid sha1 pointer in resolve-undo But if you have multiple worktrees, each with its own index, it can be very helpful to know which file had the problem. So let's pass that path down through the various index-fsck functions and use it where appropriate. After this patch you should get something like: error: 1234abcd: invalid sha1 pointer in resolve-undo of .git/worktrees/wt/index That's a bit verbose, but since the point is that you shouldn't see this normally, we're better to err on the side of more details. I've also added the index filename to the name used by "fsck --name-objects", which will show up if we find the object to be missing, etc. This is bending the rules a little there, as the option claims to write names that can be fed to rev-parse. But there is no revision syntax to access the index of another worktree, so the best we can do is make up something that a human will probably understand. I did take care to retain the existing ":file" syntax for the current worktree. So the uglier output should kick in only when it's actually necessary. See the included tests for examples of both forms. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 42 +++++++++++++++++++++++++++--------------- t/t1450-fsck.sh | 20 +++++++++++++++++++- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index c11cb2a95f..1b032eebb1 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -731,19 +731,19 @@ static int fsck_head_link(const char *head_ref_name, return 0; } -static int fsck_cache_tree(struct cache_tree *it) +static int fsck_cache_tree(struct cache_tree *it, const char *index_path) { int i; int err = 0; if (verbose) - fprintf_ln(stderr, _("Checking cache tree")); + fprintf_ln(stderr, _("Checking cache tree of %s"), index_path); if (0 <= it->entry_count) { struct object *obj = parse_object(the_repository, &it->oid); if (!obj) { - error(_("%s: invalid sha1 pointer in cache-tree"), - oid_to_hex(&it->oid)); + error(_("%s: invalid sha1 pointer in cache-tree of %s"), + oid_to_hex(&it->oid), index_path); errors_found |= ERROR_REFS; return 1; } @@ -754,11 +754,12 @@ static int fsck_cache_tree(struct cache_tree *it) err |= objerror(obj, _("non-tree in cache-tree")); } for (i = 0; i < it->subtree_nr; i++) - err |= fsck_cache_tree(it->down[i]->cache_tree); + err |= fsck_cache_tree(it->down[i]->cache_tree, index_path); return err; } -static int fsck_resolve_undo(struct index_state *istate) +static int fsck_resolve_undo(struct index_state *istate, + const char *index_path) { struct string_list_item *item; struct string_list *resolve_undo = istate->resolve_undo; @@ -781,8 +782,9 @@ static int fsck_resolve_undo(struct index_state *istate) obj = parse_object(the_repository, &ru->oid[i]); if (!obj) { - error(_("%s: invalid sha1 pointer in resolve-undo"), - oid_to_hex(&ru->oid[i])); + error(_("%s: invalid sha1 pointer in resolve-undo of %s"), + oid_to_hex(&ru->oid[i]), + index_path); errors_found |= ERROR_REFS; continue; } @@ -795,7 +797,8 @@ static int fsck_resolve_undo(struct index_state *istate) return 0; } -static void fsck_index(struct index_state *istate) +static void fsck_index(struct index_state *istate, const char *index_path, + int is_main_index) { unsigned int i; @@ -816,12 +819,14 @@ static void fsck_index(struct index_state *istate) obj = &blob->object; obj->flags |= USED; fsck_put_object_name(&fsck_walk_options, &obj->oid, - ":%s", istate->cache[i]->name); + "%s:%s", + is_main_index ? "" : index_path, + istate->cache[i]->name); mark_object_reachable(obj); } if (istate->cache_tree) - fsck_cache_tree(istate->cache_tree); - fsck_resolve_undo(istate); + fsck_cache_tree(istate->cache_tree, index_path); + fsck_resolve_undo(istate, index_path); } static void mark_object_for_connectivity(const struct object_id *oid) @@ -994,12 +999,19 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) struct worktree *wt = *p; struct index_state istate = INDEX_STATE_INIT(the_repository); + char *path; - if (read_index_from(&istate, - worktree_git_path(wt, "index"), + /* + * Make a copy since the buffer is reusable + * and may get overwritten by other calls + * while we're examining the index. + */ + path = xstrdup(worktree_git_path(wt, "index")); + if (read_index_from(&istate, path, get_worktree_git_dir(wt)) > 0) - fsck_index(&istate); + fsck_index(&istate, path, wt->is_current); discard_index(&istate); + free(path); } free_worktrees(worktrees); } diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index e01a519a69..82c92de7ca 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -1032,7 +1032,25 @@ test_expect_success 'fsck detects problems in worktree index' ' blob=$(git -C wt rev-parse :file) && remove_object $blob && - test_must_fail git fsck + test_must_fail git fsck --name-objects >actual 2>&1 && + cat >expect <<-EOF && + missing blob $blob (.git/worktrees/wt/index:file) + EOF + test_cmp expect actual +' + +test_expect_success 'fsck reports problems in main index without filename' ' + test_when_finished "rm -f .git/index && git read-tree HEAD" && + echo "this object will be removed to break the main index" >file && + git add file && + blob=$(git rev-parse :file) && + remove_object $blob && + + test_must_fail git fsck --name-objects >actual 2>&1 && + cat >expect <<-EOF && + missing blob $blob (:file) + EOF + test_cmp expect actual ' test_done From 8d3e7eac529b42319622692028b45670bdff8835 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 26 Feb 2023 17:29:43 -0500 Subject: [PATCH 4/4] fsck: check even zero-entry index files In fb64ca526a (fsck: check index files in all worktrees, 2023-02-24), we swapped out a call to vanilla repo_read_index() for a series of read_index_from() calls, one per worktree. The code for the latter was copied from add_index_objects_to_pending(), which checks for a positive return value from the index reading function, and we do the same here in fsck now. But this is probably the wrong thing. I had interpreted the check as "don't operate on the index struct if there was an error". But in reality, if there is an error then the index-reading code will simply die (which admittedly is not great for fsck, but that is not a new problem). The return value here is actually the number of entries read. So it makes sense for add_index_objects_to_pending() to ignore a zero-entry index (there is nothing to add). But for fsck, we would still want to check any extensions, etc (though presumably it is unlikely to have them in an empty index, I don't think it's impossible). So we should ignore the return value from read_index_from() entirely. This matches the behavior before fb64ca526a, when we ignored the return value from repo_read_index(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/fsck.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/builtin/fsck.c b/builtin/fsck.c index 1b032eebb1..64614b43b2 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -1007,9 +1007,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) * while we're examining the index. */ path = xstrdup(worktree_git_path(wt, "index")); - if (read_index_from(&istate, path, - get_worktree_git_dir(wt)) > 0) - fsck_index(&istate, path, wt->is_current); + read_index_from(&istate, path, get_worktree_git_dir(wt)); + fsck_index(&istate, path, wt->is_current); discard_index(&istate); free(path); }