diff --git a/Documentation/technical/sparse-index.txt b/Documentation/technical/sparse-index.txt index 8d3d8080460..3b24c1a219f 100644 --- a/Documentation/technical/sparse-index.txt +++ b/Documentation/technical/sparse-index.txt @@ -85,8 +85,41 @@ index, as well. Next, consumers of the index will be guarded against operating on a sparse-index by inserting calls to `ensure_full_index()` or -`expand_index_to_path()`. After these guards are in place, we can begin -leaving sparse-directory entries in the in-memory index structure. +`expand_index_to_path()`. If a specific path is requested, then those will +be protected from within the `index_file_exists()` and `index_name_pos()` +API calls: they will call `ensure_full_index()` if necessary. The +intention here is to preserve existing behavior when interacting with a +sparse-checkout. We don't want a change to happen by accident, without +tests. Many of these locations may not need any change before removing the +guards, but we should not do so without tests to ensure the expected +behavior happens. + +It may be desirable to _change_ the behavior of some commands in the +presence of a sparse index or more generally in any sparse-checkout +scenario. In such cases, these should be carefully communicated and +tested. No such behavior changes are intended during this phase. + +During a scan of the codebase, not every iteration of the cache entries +needs an `ensure_full_index()` check. The basic reasons include: + +1. The loop is scanning for entries with non-zero stage. These entries + are not collapsed into a sparse-directory entry. + +2. The loop is scanning for submodules. These entries are not collapsed + into a sparse-directory entry. + +3. The loop is part of the index API, especially around reading or + writing the format. + +4. The loop is checking for correct order of cache entries and that is + correct if and only if the sparse-directory entries are in the correct + location. + +5. The loop ignores entries with the `SKIP_WORKTREE` bit set, or is + otherwise already aware of sparse directory entries. + +6. The sparse-index is disabled at this point when using the split-index + feature, so no effort is made to protect the split-index API. Even after inserting these guards, we will keep expanding sparse-indexes for most Git commands using the `command_requires_full_index` repository diff --git a/attr.c b/attr.c index ac8ec7ce51e..9e897e43f53 100644 --- a/attr.c +++ b/attr.c @@ -733,7 +733,7 @@ static struct attr_stack *read_attr_from_file(const char *path, unsigned flags) return res; } -static struct attr_stack *read_attr_from_index(const struct index_state *istate, +static struct attr_stack *read_attr_from_index(struct index_state *istate, const char *path, unsigned flags) { @@ -763,7 +763,7 @@ static struct attr_stack *read_attr_from_index(const struct index_state *istate, return res; } -static struct attr_stack *read_attr(const struct index_state *istate, +static struct attr_stack *read_attr(struct index_state *istate, const char *path, unsigned flags) { struct attr_stack *res = NULL; @@ -855,7 +855,7 @@ static void push_stack(struct attr_stack **attr_stack_p, } } -static void bootstrap_attr_stack(const struct index_state *istate, +static void bootstrap_attr_stack(struct index_state *istate, struct attr_stack **stack) { struct attr_stack *e; @@ -894,7 +894,7 @@ static void bootstrap_attr_stack(const struct index_state *istate, push_stack(stack, e, NULL, 0); } -static void prepare_attr_stack(const struct index_state *istate, +static void prepare_attr_stack(struct index_state *istate, const char *path, int dirlen, struct attr_stack **stack) { @@ -1094,7 +1094,7 @@ static void determine_macros(struct all_attrs_item *all_attrs, * If check->check_nr is non-zero, only attributes in check[] are collected. * Otherwise all attributes are collected. */ -static void collect_some_attrs(const struct index_state *istate, +static void collect_some_attrs(struct index_state *istate, const char *path, struct attr_check *check) { @@ -1123,7 +1123,7 @@ static void collect_some_attrs(const struct index_state *istate, fill(path, pathlen, basename_offset, check->stack, check->all_attrs, rem); } -void git_check_attr(const struct index_state *istate, +void git_check_attr(struct index_state *istate, const char *path, struct attr_check *check) { @@ -1140,7 +1140,7 @@ void git_check_attr(const struct index_state *istate, } } -void git_all_attrs(const struct index_state *istate, +void git_all_attrs(struct index_state *istate, const char *path, struct attr_check *check) { int i; diff --git a/attr.h b/attr.h index 404548f028a..3732505edae 100644 --- a/attr.h +++ b/attr.h @@ -190,14 +190,14 @@ void attr_check_free(struct attr_check *check); */ const char *git_attr_name(const struct git_attr *); -void git_check_attr(const struct index_state *istate, +void git_check_attr(struct index_state *istate, const char *path, struct attr_check *check); /* * Retrieve all attributes that apply to the specified path. * check holds the attributes and their values. */ -void git_all_attrs(const struct index_state *istate, +void git_all_attrs(struct index_state *istate, const char *path, struct attr_check *check); enum git_attr_direction { diff --git a/builtin/add.c b/builtin/add.c index ea762a41e3a..afccf2fd554 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -141,6 +141,8 @@ static int renormalize_tracked_files(const struct pathspec *pathspec, int flags) { int i, retval = 0; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index c0bf4ac1b2d..c9a3c71914b 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -120,6 +120,8 @@ static void checkout_all(const char *prefix, int prefix_length) int i, errs = 0; struct cache_entry *last_ce = NULL; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr ; i++) { struct cache_entry *ce = active_cache[i]; if (ce_stage(ce) != checkout_stage diff --git a/builtin/checkout.c b/builtin/checkout.c index 4c696ef4805..5bd9128d1a3 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -369,6 +369,9 @@ static int checkout_worktree(const struct checkout_opts *opts, NULL); enable_delayed_checkout(&state); + + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) { struct cache_entry *ce = active_cache[pos]; if (ce->ce_flags & CE_MATCHED) { @@ -513,6 +516,8 @@ static int checkout_paths(const struct checkout_opts *opts, * Make sure all pathspecs participated in locating the paths * to be checked out. */ + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) if (opts->overlay_mode) mark_ce_for_checkout_overlay(active_cache[pos], diff --git a/builtin/commit.c b/builtin/commit.c index 55d50a8891b..190d215d43b 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -261,6 +261,8 @@ static int list_paths(struct string_list *list, const char *with_tree, free(max_prefix); } + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) { const struct cache_entry *ce = active_cache[i]; struct string_list_item *item; @@ -976,6 +978,8 @@ static int prepare_to_commit(const char *index_file, const char *prefix, if (get_oid(parent, &oid)) { int i, ita_nr = 0; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) if (ce_intent_to_add(active_cache[i])) ita_nr++; diff --git a/builtin/difftool.c b/builtin/difftool.c index ef25729d496..0202a430520 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -585,6 +585,9 @@ static int run_dir_diff(const char *extcmd, int symlinks, const char *prefix, setenv("GIT_DIFFTOOL_DIRDIFF", "true", 1); rc = run_command_v_opt(helper_argv, flags); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&wtindex); + /* * If the diff includes working copy files and those * files were modified during the diff, then the changes diff --git a/builtin/fsck.c b/builtin/fsck.c index e6a80e54042..87a99b0108e 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -881,6 +881,8 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) verify_index_checksum = 1; verify_ce_order = 1; read_cache(); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) { unsigned int mode; struct blob *blob; diff --git a/builtin/grep.c b/builtin/grep.c index 5de725f9044..b71b4a2de65 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -504,6 +504,8 @@ static int grep_cache(struct grep_opt *opt, if (repo_read_index(repo) < 0) die(_("index file corrupt")); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(repo->index); for (nr = 0; nr < repo->index->cache_nr; nr++) { const struct cache_entry *ce = repo->index->cache[nr]; diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 60a2913a01e..a0b4e54d114 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -57,7 +57,7 @@ static const char *tag_modified = ""; static const char *tag_skip_worktree = ""; static const char *tag_resolve_undo = ""; -static void write_eolinfo(const struct index_state *istate, +static void write_eolinfo(struct index_state *istate, const struct cache_entry *ce, const char *path) { if (show_eol) { @@ -122,7 +122,7 @@ static void print_debug(const struct cache_entry *ce) } } -static void show_dir_entry(const struct index_state *istate, +static void show_dir_entry(struct index_state *istate, const char *tag, struct dir_entry *ent) { int len = max_prefix_len; @@ -139,7 +139,7 @@ static void show_dir_entry(const struct index_state *istate, write_name(ent->name); } -static void show_other_files(const struct index_state *istate, +static void show_other_files(struct index_state *istate, const struct dir_struct *dir) { int i; @@ -152,7 +152,7 @@ static void show_other_files(const struct index_state *istate, } } -static void show_killed_files(const struct index_state *istate, +static void show_killed_files(struct index_state *istate, const struct dir_struct *dir) { int i; @@ -254,7 +254,7 @@ static void show_ce(struct repository *repo, struct dir_struct *dir, } } -static void show_ru_info(const struct index_state *istate) +static void show_ru_info(struct index_state *istate) { struct string_list_item *item; @@ -317,6 +317,8 @@ static void show_files(struct repository *repo, struct dir_struct *dir) if (!(show_cached || show_stage || show_deleted || show_modified)) return; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(repo->index); for (i = 0; i < repo->index->cache_nr; i++) { const struct cache_entry *ce = repo->index->cache[i]; struct stat st; @@ -494,6 +496,8 @@ void overlay_tree_on_index(struct index_state *istate, die("bad tree-ish %s", tree_name); /* Hoist the unmerged entries up to stage #3 to make room */ + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce = istate->cache[i]; if (!ce_stage(ce)) diff --git a/builtin/merge-index.c b/builtin/merge-index.c index 38ea6ad6ca2..c0383fe9df9 100644 --- a/builtin/merge-index.c +++ b/builtin/merge-index.c @@ -58,6 +58,8 @@ static void merge_one_path(const char *path) static void merge_all(void) { int i; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) { const struct cache_entry *ce = active_cache[i]; if (!ce_stage(ce)) @@ -80,6 +82,9 @@ int cmd_merge_index(int argc, const char **argv, const char *prefix) read_cache(); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); + i = 1; if (!strcmp(argv[i], "-o")) { one_shot = 1; diff --git a/builtin/rm.c b/builtin/rm.c index 4858631e0f0..5559a0b453a 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -293,6 +293,8 @@ int cmd_rm(int argc, const char **argv, const char *prefix) seen = xcalloc(pathspec.nr, 1); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) { const struct cache_entry *ce = active_cache[i]; if (!ce_path_match(&the_index, ce, &pathspec, seen)) diff --git a/builtin/stash.c b/builtin/stash.c index c56fed33546..d68ed784d2a 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1412,6 +1412,8 @@ static int do_push_stash(const struct pathspec *ps, const char *stash_msg, int q int i; char *ps_matched = xcalloc(ps->nr, 1); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (i = 0; i < active_nr; i++) ce_path_match(&the_index, active_cache[i], ps, ps_matched); diff --git a/builtin/update-index.c b/builtin/update-index.c index 79087bccea4..f1f16f2de52 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -745,6 +745,8 @@ static int do_reupdate(int ac, const char **av, */ has_head = 0; redo: + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(&the_index); for (pos = 0; pos < active_nr; pos++) { const struct cache_entry *ce = active_cache[pos]; struct cache_entry *old = NULL; diff --git a/cache.h b/cache.h index d81582e6ecf..b785ffb3831 100644 --- a/cache.h +++ b/cache.h @@ -350,6 +350,7 @@ void add_name_hash(struct index_state *istate, struct cache_entry *ce); void remove_name_hash(struct index_state *istate, struct cache_entry *ce); void free_name_hash(struct index_state *istate); +void ensure_full_index(struct index_state *istate); /* Cache entry creation and cleanup */ @@ -800,7 +801,7 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na * index_name_pos(&index, "f", 1) -> -3 * index_name_pos(&index, "g", 1) -> -5 */ -int index_name_pos(const struct index_state *, const char *name, int namelen); +int index_name_pos(struct index_state *, const char *name, int namelen); /* * Some functions return the negative complement of an insert position when a @@ -850,8 +851,8 @@ int add_file_to_index(struct index_state *, const char *path, int flags); int chmod_index_entry(struct index_state *, struct cache_entry *ce, char flip); int ce_same_name(const struct cache_entry *a, const struct cache_entry *b); void set_object_name_for_intent_to_add_entry(struct cache_entry *ce); -int index_name_is_other(const struct index_state *, const char *, int); -void *read_blob_data_from_index(const struct index_state *, const char *, unsigned long *); +int index_name_is_other(struct index_state *, const char *, int); +void *read_blob_data_from_index(struct index_state *, const char *, unsigned long *); /* do stat comparison even if CE_VALID is true */ #define CE_MATCH_IGNORE_VALID 01 diff --git a/convert.c b/convert.c index 45ac75f80c8..fd9c84b0257 100644 --- a/convert.c +++ b/convert.c @@ -127,7 +127,7 @@ static const char *gather_convert_stats_ascii(const char *data, unsigned long si } } -const char *get_cached_convert_stats_ascii(const struct index_state *istate, +const char *get_cached_convert_stats_ascii(struct index_state *istate, const char *path) { const char *ret; @@ -211,7 +211,7 @@ static void check_global_conv_flags_eol(const char *path, } } -static int has_crlf_in_index(const struct index_state *istate, const char *path) +static int has_crlf_in_index(struct index_state *istate, const char *path) { unsigned long sz; void *data; @@ -485,7 +485,7 @@ static int encode_to_worktree(const char *path, const char *src, size_t src_len, return 1; } -static int crlf_to_git(const struct index_state *istate, +static int crlf_to_git(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *buf, enum convert_crlf_action crlf_action, int conv_flags) @@ -1293,7 +1293,7 @@ static int git_path_check_ident(struct attr_check_item *check) static struct attr_check *check; -void convert_attrs(const struct index_state *istate, +void convert_attrs(struct index_state *istate, struct conv_attrs *ca, const char *path) { struct attr_check_item *ccheck = NULL; @@ -1355,7 +1355,7 @@ void reset_parsed_attributes(void) user_convert_tail = NULL; } -int would_convert_to_git_filter_fd(const struct index_state *istate, const char *path) +int would_convert_to_git_filter_fd(struct index_state *istate, const char *path) { struct conv_attrs ca; @@ -1374,7 +1374,7 @@ int would_convert_to_git_filter_fd(const struct index_state *istate, const char return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN, NULL, NULL); } -const char *get_convert_attr_ascii(const struct index_state *istate, const char *path) +const char *get_convert_attr_ascii(struct index_state *istate, const char *path) { struct conv_attrs ca; @@ -1400,7 +1400,7 @@ const char *get_convert_attr_ascii(const struct index_state *istate, const char return ""; } -int convert_to_git(const struct index_state *istate, +int convert_to_git(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst, int conv_flags) { @@ -1434,7 +1434,7 @@ int convert_to_git(const struct index_state *istate, return ret | ident_to_git(src, len, dst, ca.ident); } -void convert_to_git_filter_fd(const struct index_state *istate, +void convert_to_git_filter_fd(struct index_state *istate, const char *path, int fd, struct strbuf *dst, int conv_flags) { @@ -1511,7 +1511,7 @@ int convert_to_working_tree_ca(const struct conv_attrs *ca, meta, NULL); } -int renormalize_buffer(const struct index_state *istate, const char *path, +int renormalize_buffer(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst) { struct conv_attrs ca; @@ -1972,7 +1972,7 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, return filter; } -struct stream_filter *get_stream_filter(const struct index_state *istate, +struct stream_filter *get_stream_filter(struct index_state *istate, const char *path, const struct object_id *oid) { diff --git a/convert.h b/convert.h index 43e567a59b0..5ee1c322058 100644 --- a/convert.h +++ b/convert.h @@ -84,19 +84,19 @@ struct conv_attrs { const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */ }; -void convert_attrs(const struct index_state *istate, +void convert_attrs(struct index_state *istate, struct conv_attrs *ca, const char *path); extern enum eol core_eol; extern char *check_roundtrip_encoding; -const char *get_cached_convert_stats_ascii(const struct index_state *istate, +const char *get_cached_convert_stats_ascii(struct index_state *istate, const char *path); const char *get_wt_convert_stats_ascii(const char *path); -const char *get_convert_attr_ascii(const struct index_state *istate, +const char *get_convert_attr_ascii(struct index_state *istate, const char *path); /* returns 1 if *dst was used */ -int convert_to_git(const struct index_state *istate, +int convert_to_git(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst, int conv_flags); int convert_to_working_tree_ca(const struct conv_attrs *ca, @@ -108,7 +108,7 @@ int async_convert_to_working_tree_ca(const struct conv_attrs *ca, size_t len, struct strbuf *dst, const struct checkout_metadata *meta, void *dco); -static inline int convert_to_working_tree(const struct index_state *istate, +static inline int convert_to_working_tree(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst, const struct checkout_metadata *meta) @@ -117,7 +117,7 @@ static inline int convert_to_working_tree(const struct index_state *istate, convert_attrs(istate, &ca, path); return convert_to_working_tree_ca(&ca, path, src, len, dst, meta); } -static inline int async_convert_to_working_tree(const struct index_state *istate, +static inline int async_convert_to_working_tree(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst, const struct checkout_metadata *meta, @@ -129,20 +129,20 @@ static inline int async_convert_to_working_tree(const struct index_state *istate } int async_query_available_blobs(const char *cmd, struct string_list *available_paths); -int renormalize_buffer(const struct index_state *istate, +int renormalize_buffer(struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst); -static inline int would_convert_to_git(const struct index_state *istate, +static inline int would_convert_to_git(struct index_state *istate, const char *path) { return convert_to_git(istate, path, NULL, 0, NULL, 0); } /* Precondition: would_convert_to_git_filter_fd(path) == true */ -void convert_to_git_filter_fd(const struct index_state *istate, +void convert_to_git_filter_fd(struct index_state *istate, const char *path, int fd, struct strbuf *dst, int conv_flags); -int would_convert_to_git_filter_fd(const struct index_state *istate, +int would_convert_to_git_filter_fd(struct index_state *istate, const char *path); /* @@ -176,7 +176,7 @@ void reset_parsed_attributes(void); struct stream_filter; /* opaque */ -struct stream_filter *get_stream_filter(const struct index_state *istate, +struct stream_filter *get_stream_filter(struct index_state *istate, const char *path, const struct object_id *); struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, diff --git a/dir.c b/dir.c index 3474e67e8f3..d81b5df8030 100644 --- a/dir.c +++ b/dir.c @@ -306,7 +306,7 @@ static int do_read_blob(const struct object_id *oid, struct oid_stat *oid_stat, * [1] Only if DO_MATCH_DIRECTORY is passed; otherwise, this is NOT a match. * [2] Only if DO_MATCH_LEADING_PATHSPEC is passed; otherwise, not a match. */ -static int match_pathspec_item(const struct index_state *istate, +static int match_pathspec_item(struct index_state *istate, const struct pathspec_item *item, int prefix, const char *name, int namelen, unsigned flags) { @@ -429,7 +429,7 @@ static int match_pathspec_item(const struct index_state *istate, * pathspec did not match any names, which could indicate that the * user mistyped the nth pathspec. */ -static int do_match_pathspec(const struct index_state *istate, +static int do_match_pathspec(struct index_state *istate, const struct pathspec *ps, const char *name, int namelen, int prefix, char *seen, @@ -500,7 +500,7 @@ static int do_match_pathspec(const struct index_state *istate, return retval; } -static int match_pathspec_with_flags(const struct index_state *istate, +static int match_pathspec_with_flags(struct index_state *istate, const struct pathspec *ps, const char *name, int namelen, int prefix, char *seen, unsigned flags) @@ -516,7 +516,7 @@ static int match_pathspec_with_flags(const struct index_state *istate, return negative ? 0 : positive; } -int match_pathspec(const struct index_state *istate, +int match_pathspec(struct index_state *istate, const struct pathspec *ps, const char *name, int namelen, int prefix, char *seen, int is_dir) @@ -529,7 +529,7 @@ int match_pathspec(const struct index_state *istate, /** * Check if a submodule is a superset of the pathspec */ -int submodule_path_match(const struct index_state *istate, +int submodule_path_match(struct index_state *istate, const struct pathspec *ps, const char *submodule_name, char *seen) @@ -892,7 +892,7 @@ void add_pattern(const char *string, const char *base, add_pattern_to_hashsets(pl, pattern); } -static int read_skip_worktree_file_from_index(const struct index_state *istate, +static int read_skip_worktree_file_from_index(struct index_state *istate, const char *path, size_t *size_out, char **data_out, struct oid_stat *oid_stat) @@ -3542,6 +3542,8 @@ static void connect_wt_gitdir_in_nested(const char *sub_worktree, if (repo_read_index(&subrepo) < 0) die(_("index file corrupt in repo %s"), subrepo.gitdir); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(subrepo.index); for (i = 0; i < subrepo.index->cache_nr; i++) { const struct cache_entry *ce = subrepo.index->cache[i]; diff --git a/dir.h b/dir.h index 04d886cfce7..35963f9e2ce 100644 --- a/dir.h +++ b/dir.h @@ -354,7 +354,7 @@ int count_slashes(const char *s); int simple_length(const char *match); int no_wildcard(const char *string); char *common_prefix(const struct pathspec *pathspec); -int match_pathspec(const struct index_state *istate, +int match_pathspec(struct index_state *istate, const struct pathspec *pathspec, const char *name, int namelen, int prefix, char *seen, int is_dir); @@ -493,12 +493,12 @@ int git_fnmatch(const struct pathspec_item *item, const char *pattern, const char *string, int prefix); -int submodule_path_match(const struct index_state *istate, +int submodule_path_match(struct index_state *istate, const struct pathspec *ps, const char *submodule_name, char *seen); -static inline int ce_path_match(const struct index_state *istate, +static inline int ce_path_match(struct index_state *istate, const struct cache_entry *ce, const struct pathspec *pathspec, char *seen) @@ -507,7 +507,7 @@ static inline int ce_path_match(const struct index_state *istate, S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)); } -static inline int dir_path_match(const struct index_state *istate, +static inline int dir_path_match(struct index_state *istate, const struct dir_entry *ent, const struct pathspec *pathspec, int prefix, char *seen) diff --git a/entry.c b/entry.c index 2dc94ba5cc2..9fdc843135f 100644 --- a/entry.c +++ b/entry.c @@ -423,6 +423,8 @@ static void mark_colliding_entries(const struct checkout *state, ce->ce_flags |= CE_MATCHED; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(state->istate); for (i = 0; i < state->istate->cache_nr; i++) { struct cache_entry *dup = state->istate->cache[i]; diff --git a/merge-ort.c b/merge-ort.c index b1795d838ea..6c2792b10e1 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2564,7 +2564,7 @@ static int blob_unchanged(struct merge_options *opt, struct strbuf basebuf = STRBUF_INIT; struct strbuf sidebuf = STRBUF_INIT; int ret = 0; /* assume changed for safety */ - const struct index_state *idx = &opt->priv->attr_index; + struct index_state *idx = &opt->priv->attr_index; if (!idx->initialized) initialize_attr_index(opt); diff --git a/merge-recursive.c b/merge-recursive.c index 7618303f7b4..27b222ae490 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -522,6 +522,8 @@ static struct string_list *get_unmerged(struct index_state *istate) unmerged->strdup_strings = 1; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { struct string_list_item *item; struct stage_data *e; @@ -3014,7 +3016,7 @@ static int blob_unchanged(struct merge_options *opt, struct strbuf obuf = STRBUF_INIT; struct strbuf abuf = STRBUF_INIT; int ret = 0; /* assume changed for safety */ - const struct index_state *idx = opt->repo->index; + struct index_state *idx = opt->repo->index; if (a->mode != o->mode) return 0; diff --git a/name-hash.c b/name-hash.c index ce28f3f0708..7487d331240 100644 --- a/name-hash.c +++ b/name-hash.c @@ -8,6 +8,7 @@ #include "cache.h" #include "thread-utils.h" #include "trace2.h" +#include "sparse-index.h" struct dir_entry { struct hashmap_entry ent; @@ -109,8 +110,11 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) if (ce->ce_flags & CE_HASHED) return; ce->ce_flags |= CE_HASHED; - hashmap_entry_init(&ce->ent, memihash(ce->name, ce_namelen(ce))); - hashmap_add(&istate->name_hash, &ce->ent); + + if (!S_ISSPARSEDIR(ce->ce_mode)) { + hashmap_entry_init(&ce->ent, memihash(ce->name, ce_namelen(ce))); + hashmap_add(&istate->name_hash, &ce->ent); + } if (ignore_case) add_dir_entry(istate, ce); @@ -680,6 +684,7 @@ int index_dir_exists(struct index_state *istate, const char *name, int namelen) struct dir_entry *dir; lazy_init_name_hash(istate); + expand_to_path(istate, name, namelen, 0); dir = find_dir_entry(istate, name, namelen); return dir && dir->nr; } @@ -690,6 +695,7 @@ void adjust_dirname_case(struct index_state *istate, char *name) const char *ptr = startPtr; lazy_init_name_hash(istate); + expand_to_path(istate, name, strlen(name), 0); while (*ptr) { while (*ptr && *ptr != '/') ptr++; @@ -713,6 +719,7 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na unsigned int hash = memihash(name, namelen); lazy_init_name_hash(istate); + expand_to_path(istate, name, namelen, icase); ce = hashmap_get_entry_from_hash(&istate->name_hash, hash, NULL, struct cache_entry, ent); diff --git a/pathspec.c b/pathspec.c index 18b3be362ae..14c7e9fbe33 100644 --- a/pathspec.c +++ b/pathspec.c @@ -20,7 +20,7 @@ * to use find_pathspecs_matching_against_index() instead. */ void add_pathspec_matches_against_index(const struct pathspec *pathspec, - const struct index_state *istate, + struct index_state *istate, char *seen) { int num_unmatched = 0, i; @@ -36,6 +36,8 @@ void add_pathspec_matches_against_index(const struct pathspec *pathspec, num_unmatched++; if (!num_unmatched) return; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { const struct cache_entry *ce = istate->cache[i]; ce_path_match(istate, ce, pathspec, seen); @@ -51,7 +53,7 @@ void add_pathspec_matches_against_index(const struct pathspec *pathspec, * given pathspecs achieves against all items in the index. */ char *find_pathspecs_matching_against_index(const struct pathspec *pathspec, - const struct index_state *istate) + struct index_state *istate) { char *seen = xcalloc(pathspec->nr, 1); add_pathspec_matches_against_index(pathspec, istate, seen); @@ -702,7 +704,7 @@ void clear_pathspec(struct pathspec *pathspec) pathspec->nr = 0; } -int match_pathspec_attrs(const struct index_state *istate, +int match_pathspec_attrs(struct index_state *istate, const char *name, int namelen, const struct pathspec_item *item) { diff --git a/pathspec.h b/pathspec.h index 454ce364fac..2ccc8080b6c 100644 --- a/pathspec.h +++ b/pathspec.h @@ -150,11 +150,11 @@ static inline int ps_strcmp(const struct pathspec_item *item, } void add_pathspec_matches_against_index(const struct pathspec *pathspec, - const struct index_state *istate, + struct index_state *istate, char *seen); char *find_pathspecs_matching_against_index(const struct pathspec *pathspec, - const struct index_state *istate); -int match_pathspec_attrs(const struct index_state *istate, + struct index_state *istate); +int match_pathspec_attrs(struct index_state *istate, const char *name, int namelen, const struct pathspec_item *item); diff --git a/read-cache.c b/read-cache.c index d68e7479af4..72a1d339c97 100644 --- a/read-cache.c +++ b/read-cache.c @@ -549,7 +549,7 @@ int cache_name_stage_compare(const char *name1, int len1, int stage1, const char return 0; } -static int index_name_stage_pos(const struct index_state *istate, const char *name, int namelen, int stage) +static int index_name_stage_pos(struct index_state *istate, const char *name, int namelen, int stage) { int first, last; @@ -567,10 +567,31 @@ static int index_name_stage_pos(const struct index_state *istate, const char *na } first = next+1; } + + if (istate->sparse_index && + first > 0) { + /* Note: first <= istate->cache_nr */ + struct cache_entry *ce = istate->cache[first - 1]; + + /* + * If we are in a sparse-index _and_ the entry before the + * insertion position is a sparse-directory entry that is + * an ancestor of 'name', then we need to expand the index + * and search again. This will only trigger once, because + * thereafter the index is fully expanded. + */ + if (S_ISSPARSEDIR(ce->ce_mode) && + ce_namelen(ce) < namelen && + !strncmp(name, ce->name, ce_namelen(ce))) { + ensure_full_index(istate); + return index_name_stage_pos(istate, name, namelen, stage); + } + } + return -first-1; } -int index_name_pos(const struct index_state *istate, const char *name, int namelen) +int index_name_pos(struct index_state *istate, const char *name, int namelen) { return index_name_stage_pos(istate, name, namelen, 0); } @@ -1556,6 +1577,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, */ preload_index(istate, pathspec, 0); trace2_region_enter("index", "refresh", NULL); + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce, *new_entry; int cache_errno = 0; @@ -2478,6 +2501,8 @@ int repo_index_has_changes(struct repository *repo, diff_flush(&opt); return opt.flags.has_changes != 0; } else { + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; sb && i < istate->cache_nr; i++) { if (i) strbuf_addch(sb, ' '); @@ -3390,8 +3415,8 @@ int repo_read_index_unmerged(struct repository *repo) * We helpfully remove a trailing "/" from directories so that * the output of read_directory can be used as-is. */ -int index_name_is_other(const struct index_state *istate, const char *name, - int namelen) +int index_name_is_other(struct index_state *istate, const char *name, + int namelen) { int pos; if (namelen && name[namelen - 1] == '/') @@ -3409,7 +3434,7 @@ int index_name_is_other(const struct index_state *istate, const char *name, return 1; } -void *read_blob_data_from_index(const struct index_state *istate, +void *read_blob_data_from_index(struct index_state *istate, const char *path, unsigned long *size) { int pos, len; diff --git a/resolve-undo.c b/resolve-undo.c index bbd2e57fe41..e81096e2d45 100644 --- a/resolve-undo.c +++ b/resolve-undo.c @@ -172,6 +172,8 @@ void unmerge_marked_index(struct index_state *istate) if (!istate->resolve_undo) return; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { const struct cache_entry *ce = istate->cache[i]; if (ce->ce_flags & CE_MATCHED) @@ -186,6 +188,8 @@ void unmerge_index(struct index_state *istate, const struct pathspec *pathspec) if (!istate->resolve_undo) return; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { const struct cache_entry *ce = istate->cache[i]; if (!ce_path_match(istate, ce, pathspec, NULL)) diff --git a/revision.c b/revision.c index 7e73dafd96e..4853c85d0b7 100644 --- a/revision.c +++ b/revision.c @@ -1680,6 +1680,8 @@ static void do_add_index_objects_to_pending(struct rev_info *revs, { int i; + /* TODO: audit for interaction with sparse-index. */ + ensure_full_index(istate); for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce = istate->cache[i]; struct blob *blob; diff --git a/sparse-index.c b/sparse-index.c index 95ea17174da..6f21397e2ee 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -283,3 +283,76 @@ void ensure_full_index(struct index_state *istate) trace2_region_leave("index", "ensure_full_index", istate->repo); } + +/* + * This static global helps avoid infinite recursion between + * expand_to_path() and index_file_exists(). + */ +static int in_expand_to_path = 0; + +void expand_to_path(struct index_state *istate, + const char *path, size_t pathlen, int icase) +{ + struct strbuf path_mutable = STRBUF_INIT; + size_t substr_len; + + /* prevent extra recursion */ + if (in_expand_to_path) + return; + + if (!istate || !istate->sparse_index) + return; + + if (!istate->repo) + istate->repo = the_repository; + + in_expand_to_path = 1; + + /* + * We only need to actually expand a region if the + * following are both true: + * + * 1. 'path' is not already in the index. + * 2. Some parent directory of 'path' is a sparse directory. + */ + + if (index_file_exists(istate, path, pathlen, icase)) + goto cleanup; + + strbuf_add(&path_mutable, path, pathlen); + strbuf_addch(&path_mutable, '/'); + + /* Check the name hash for all parent directories */ + substr_len = 0; + while (substr_len < pathlen) { + char temp; + char *replace = strchr(path_mutable.buf + substr_len, '/'); + + if (!replace) + break; + + /* replace the character _after_ the slash */ + replace++; + temp = *replace; + *replace = '\0'; + if (index_file_exists(istate, path_mutable.buf, + path_mutable.len, icase)) { + /* + * We found a parent directory in the name-hash + * hashtable, because only sparse directory entries + * have a trailing '/' character. Since "path" wasn't + * in the index, perhaps it exists within this + * sparse-directory. Expand accordingly. + */ + ensure_full_index(istate); + break; + } + + *replace = temp; + substr_len = replace - path_mutable.buf; + } + +cleanup: + strbuf_release(&path_mutable); + in_expand_to_path = 0; +} diff --git a/sparse-index.h b/sparse-index.h index 39dcc859735..1115a0d7dd9 100644 --- a/sparse-index.h +++ b/sparse-index.h @@ -2,9 +2,21 @@ #define SPARSE_INDEX_H__ struct index_state; -void ensure_full_index(struct index_state *istate); int convert_to_sparse(struct index_state *istate); +/* + * Some places in the codebase expect to search for a specific path. + * This path might be outside of the sparse-checkout definition, in + * which case a sparse-index may not contain a path for that index. + * + * Given an index and a path, check to see if a leading directory for + * 'path' exists in the index as a sparse directory. In that case, + * expand that sparse directory to a full range of cache entries and + * populate the index accordingly. + */ +void expand_to_path(struct index_state *istate, + const char *path, size_t pathlen, int icase); + struct repository; int set_sparse_index_config(struct repository *repo, int enable); diff --git a/submodule.c b/submodule.c index 9767ba9893c..83809a4f7bd 100644 --- a/submodule.c +++ b/submodule.c @@ -33,7 +33,7 @@ static struct oid_array ref_tips_after_fetch; * will be disabled because we can't guess what might be configured in * .gitmodules unless the user resolves the conflict. */ -int is_gitmodules_unmerged(const struct index_state *istate) +int is_gitmodules_unmerged(struct index_state *istate) { int pos = index_name_pos(istate, GITMODULES_FILE, strlen(GITMODULES_FILE)); if (pos < 0) { /* .gitmodules not found or isn't merged */ @@ -301,7 +301,7 @@ int is_submodule_populated_gently(const char *path, int *return_error_code) /* * Dies if the provided 'prefix' corresponds to an unpopulated submodule */ -void die_in_unpopulated_submodule(const struct index_state *istate, +void die_in_unpopulated_submodule(struct index_state *istate, const char *prefix) { int i, prefixlen; @@ -331,7 +331,7 @@ void die_in_unpopulated_submodule(const struct index_state *istate, /* * Dies if any paths in the provided pathspec descends into a submodule */ -void die_path_inside_submodule(const struct index_state *istate, +void die_path_inside_submodule(struct index_state *istate, const struct pathspec *ps) { int i, j; diff --git a/submodule.h b/submodule.h index 4ac6e31cf1f..84640c49c11 100644 --- a/submodule.h +++ b/submodule.h @@ -39,7 +39,7 @@ struct submodule_update_strategy { }; #define SUBMODULE_UPDATE_STRATEGY_INIT {SM_UPDATE_UNSPECIFIED, NULL} -int is_gitmodules_unmerged(const struct index_state *istate); +int is_gitmodules_unmerged(struct index_state *istate); int is_writing_gitmodules_ok(void); int is_staging_gitmodules_ok(struct index_state *istate); int update_path_in_gitmodules(const char *oldpath, const char *newpath); @@ -60,9 +60,9 @@ int is_submodule_active(struct repository *repo, const char *path); * Otherwise the return error code is the same as of resolve_gitdir_gently. */ int is_submodule_populated_gently(const char *path, int *return_error_code); -void die_in_unpopulated_submodule(const struct index_state *istate, +void die_in_unpopulated_submodule(struct index_state *istate, const char *prefix); -void die_path_inside_submodule(const struct index_state *istate, +void die_path_inside_submodule(struct index_state *istate, const struct pathspec *ps); enum submodule_update_type parse_submodule_update_type(const char *value); int parse_submodule_update_strategy(const char *value,