From 38e95844e8f9bfefb34443650e99f740581a394b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:30 -0300 Subject: [PATCH 1/9] convert: make convert_attrs() and convert structs public Move convert_attrs() declaration from convert.c to convert.h, together with the conv_attrs struct and the crlf_action enum. This function and the data structures will be used outside convert.c in the upcoming parallel checkout implementation. Note that crlf_action is renamed to convert_crlf_action, which is more appropriate for the global namespace. Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 35 ++++++++--------------------------- convert.h | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/convert.c b/convert.c index ee360c2f07..f13b001273 100644 --- a/convert.c +++ b/convert.c @@ -24,17 +24,6 @@ #define CONVERT_STAT_BITS_TXT_CRLF 0x2 #define CONVERT_STAT_BITS_BIN 0x4 -enum crlf_action { - CRLF_UNDEFINED, - CRLF_BINARY, - CRLF_TEXT, - CRLF_TEXT_INPUT, - CRLF_TEXT_CRLF, - CRLF_AUTO, - CRLF_AUTO_INPUT, - CRLF_AUTO_CRLF -}; - struct text_stat { /* NUL, CR, LF and CRLF counts */ unsigned nul, lonecr, lonelf, crlf; @@ -172,7 +161,7 @@ static int text_eol_is_crlf(void) return 0; } -static enum eol output_eol(enum crlf_action crlf_action) +static enum eol output_eol(enum convert_crlf_action crlf_action) { switch (crlf_action) { case CRLF_BINARY: @@ -246,7 +235,7 @@ static int has_crlf_in_index(const struct index_state *istate, const char *path) } static int will_convert_lf_to_crlf(struct text_stat *stats, - enum crlf_action crlf_action) + enum convert_crlf_action crlf_action) { if (output_eol(crlf_action) != EOL_CRLF) return 0; @@ -499,7 +488,7 @@ static int encode_to_worktree(const char *path, const char *src, size_t src_len, static int crlf_to_git(const struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *buf, - enum crlf_action crlf_action, int conv_flags) + enum convert_crlf_action crlf_action, int conv_flags) { struct text_stat stats; char *dst; @@ -585,8 +574,8 @@ static int crlf_to_git(const struct index_state *istate, return 1; } -static int crlf_to_worktree(const char *src, size_t len, - struct strbuf *buf, enum crlf_action crlf_action) +static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf, + enum convert_crlf_action crlf_action) { char *to_free = NULL; struct text_stat stats; @@ -1247,7 +1236,7 @@ static const char *git_path_check_encoding(struct attr_check_item *check) return value; } -static enum crlf_action git_path_check_crlf(struct attr_check_item *check) +static enum convert_crlf_action git_path_check_crlf(struct attr_check_item *check) { const char *value = check->value; @@ -1297,18 +1286,10 @@ static int git_path_check_ident(struct attr_check_item *check) return !!ATTR_TRUE(value); } -struct conv_attrs { - struct convert_driver *drv; - enum crlf_action attr_action; /* What attr says */ - enum crlf_action crlf_action; /* When no attr is set, use core.autocrlf */ - int ident; - const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */ -}; - static struct attr_check *check; -static void convert_attrs(const struct index_state *istate, - struct conv_attrs *ca, const char *path) +void convert_attrs(const struct index_state *istate, + struct conv_attrs *ca, const char *path) { struct attr_check_item *ccheck = NULL; diff --git a/convert.h b/convert.h index e29d1026a6..5678e99922 100644 --- a/convert.h +++ b/convert.h @@ -63,6 +63,30 @@ struct checkout_metadata { struct object_id blob; }; +enum convert_crlf_action { + CRLF_UNDEFINED, + CRLF_BINARY, + CRLF_TEXT, + CRLF_TEXT_INPUT, + CRLF_TEXT_CRLF, + CRLF_AUTO, + CRLF_AUTO_INPUT, + CRLF_AUTO_CRLF +}; + +struct convert_driver; + +struct conv_attrs { + struct convert_driver *drv; + enum convert_crlf_action attr_action; /* What attr says */ + enum convert_crlf_action crlf_action; /* When no attr is set, use core.autocrlf */ + int ident; + const char *working_tree_encoding; /* Supported encoding or default encoding if NULL */ +}; + +void convert_attrs(const struct index_state *istate, + struct conv_attrs *ca, const char *path); + extern enum eol core_eol; extern char *check_roundtrip_encoding; const char *get_cached_convert_stats_ascii(const struct index_state *istate, From 55b4ad0eada6924e5e59600266b5d50cbf22733d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:31 -0300 Subject: [PATCH 2/9] convert: add [async_]convert_to_working_tree_ca() variants Separate the attribute gathering from the actual conversion by adding _ca() variants of the conversion functions. These variants receive a precomputed 'struct conv_attrs', not relying, thus, on an index state. They will be used in a future patch adding parallel checkout support, for two reasons: - We will already load the conversion attributes in checkout_entry(), before conversion, to decide whether a path is eligible for parallel checkout. Therefore, it would be wasteful to load them again later, for the actual conversion. - The parallel workers will be responsible for reading, converting and writing blobs to the working tree. They won't have access to the main process' index state, so they cannot load the attributes. Instead, they will receive the preloaded ones and call the _ca() variant of the conversion functions. Furthermore, the attributes machinery is optimized to handle paths in sequential order, so it's better to leave it for the main process, anyway. Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 60 +++++++++++++++++++++++++++++-------------------------- convert.h | 37 +++++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 37 deletions(-) diff --git a/convert.c b/convert.c index f13b001273..0307374241 100644 --- a/convert.c +++ b/convert.c @@ -1447,19 +1447,16 @@ void convert_to_git_filter_fd(const struct index_state *istate, ident_to_git(dst->buf, dst->len, dst, ca.ident); } -static int convert_to_working_tree_internal(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - int normalizing, - const struct checkout_metadata *meta, - struct delayed_checkout *dco) +static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + int normalizing, + const struct checkout_metadata *meta, + struct delayed_checkout *dco) { int ret = 0, ret_filter = 0; - struct conv_attrs ca; - convert_attrs(istate, &ca, path); - - ret |= ident_to_worktree(src, len, dst, ca.ident); + ret |= ident_to_worktree(src, len, dst, ca->ident); if (ret) { src = dst->buf; len = dst->len; @@ -1469,49 +1466,56 @@ static int convert_to_working_tree_internal(const struct index_state *istate, * is a smudge or process filter (even if the process filter doesn't * support smudge). The filters might expect CRLFs. */ - if ((ca.drv && (ca.drv->smudge || ca.drv->process)) || !normalizing) { - ret |= crlf_to_worktree(src, len, dst, ca.crlf_action); + if ((ca->drv && (ca->drv->smudge || ca->drv->process)) || !normalizing) { + ret |= crlf_to_worktree(src, len, dst, ca->crlf_action); if (ret) { src = dst->buf; len = dst->len; } } - ret |= encode_to_worktree(path, src, len, dst, ca.working_tree_encoding); + ret |= encode_to_worktree(path, src, len, dst, ca->working_tree_encoding); if (ret) { src = dst->buf; len = dst->len; } ret_filter = apply_filter( - path, src, len, -1, dst, ca.drv, CAP_SMUDGE, meta, dco); - if (!ret_filter && ca.drv && ca.drv->required) - die(_("%s: smudge filter %s failed"), path, ca.drv->name); + path, src, len, -1, dst, ca->drv, CAP_SMUDGE, meta, dco); + if (!ret_filter && ca->drv && ca->drv->required) + die(_("%s: smudge filter %s failed"), path, ca->drv->name); return ret | ret_filter; } -int async_convert_to_working_tree(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - const struct checkout_metadata *meta, - void *dco) +int async_convert_to_working_tree_ca(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta, + void *dco) { - return convert_to_working_tree_internal(istate, path, src, len, dst, 0, meta, dco); + return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0, + meta, dco); } -int convert_to_working_tree(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - const struct checkout_metadata *meta) +int convert_to_working_tree_ca(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta) { - return convert_to_working_tree_internal(istate, path, src, len, dst, 0, meta, NULL); + return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0, + meta, NULL); } int renormalize_buffer(const struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst) { - int ret = convert_to_working_tree_internal(istate, path, src, len, dst, 1, NULL, NULL); + struct conv_attrs ca; + int ret; + + convert_attrs(istate, &ca, path); + ret = convert_to_working_tree_ca_internal(&ca, path, src, len, dst, 1, + NULL, NULL); if (ret) { src = dst->buf; len = dst->len; diff --git a/convert.h b/convert.h index 5678e99922..a4838b5e5c 100644 --- a/convert.h +++ b/convert.h @@ -99,15 +99,34 @@ const char *get_convert_attr_ascii(const struct index_state *istate, int convert_to_git(const struct index_state *istate, const char *path, const char *src, size_t len, struct strbuf *dst, int conv_flags); -int convert_to_working_tree(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - const struct checkout_metadata *meta); -int async_convert_to_working_tree(const struct index_state *istate, - const char *path, const char *src, - size_t len, struct strbuf *dst, - const struct checkout_metadata *meta, - void *dco); +int convert_to_working_tree_ca(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta); +int async_convert_to_working_tree_ca(const struct conv_attrs *ca, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta, + void *dco); +static inline int convert_to_working_tree(const struct index_state *istate, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta) +{ + struct conv_attrs ca; + convert_attrs(istate, &ca, path); + return convert_to_working_tree_ca(&ca, path, src, len, dst, meta); +} +static inline int async_convert_to_working_tree(const struct index_state *istate, + const char *path, const char *src, + size_t len, struct strbuf *dst, + const struct checkout_metadata *meta, + void *dco) +{ + struct conv_attrs ca; + convert_attrs(istate, &ca, path); + return async_convert_to_working_tree_ca(&ca, path, src, len, dst, meta, dco); +} int async_query_available_blobs(const char *cmd, struct string_list *available_paths); int renormalize_buffer(const struct index_state *istate, From 3e9e82c0d897814d77cc755d25acb9df6e8bf48f Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:32 -0300 Subject: [PATCH 3/9] convert: add get_stream_filter_ca() variant Like the previous patch, we will also need to call get_stream_filter() with a precomputed `struct conv_attrs`, when we add support for parallel checkout workers. So add the _ca() variant which takes the conversion attributes struct as a parameter. Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 28 +++++++++++++++++----------- convert.h | 2 ++ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/convert.c b/convert.c index 0307374241..9af6aafc5a 100644 --- a/convert.c +++ b/convert.c @@ -1942,34 +1942,31 @@ static struct stream_filter *ident_filter(const struct object_id *oid) } /* - * Return an appropriately constructed filter for the path, or NULL if + * Return an appropriately constructed filter for the given ca, or NULL if * the contents cannot be filtered without reading the whole thing * in-core. * * Note that you would be crazy to set CRLF, smudge/clean or ident to a * large binary blob you would want us not to slurp into the memory! */ -struct stream_filter *get_stream_filter(const struct index_state *istate, - const char *path, - const struct object_id *oid) +struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, + const struct object_id *oid) { - struct conv_attrs ca; struct stream_filter *filter = NULL; - convert_attrs(istate, &ca, path); - if (ca.drv && (ca.drv->process || ca.drv->smudge || ca.drv->clean)) + if (ca->drv && (ca->drv->process || ca->drv->smudge || ca->drv->clean)) return NULL; - if (ca.working_tree_encoding) + if (ca->working_tree_encoding) return NULL; - if (ca.crlf_action == CRLF_AUTO || ca.crlf_action == CRLF_AUTO_CRLF) + if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF) return NULL; - if (ca.ident) + if (ca->ident) filter = ident_filter(oid); - if (output_eol(ca.crlf_action) == EOL_CRLF) + if (output_eol(ca->crlf_action) == EOL_CRLF) filter = cascade_filter(filter, lf_to_crlf_filter()); else filter = cascade_filter(filter, &null_filter_singleton); @@ -1977,6 +1974,15 @@ struct stream_filter *get_stream_filter(const struct index_state *istate, return filter; } +struct stream_filter *get_stream_filter(const struct index_state *istate, + const char *path, + const struct object_id *oid) +{ + struct conv_attrs ca; + convert_attrs(istate, &ca, path); + return get_stream_filter_ca(&ca, oid); +} + void free_stream_filter(struct stream_filter *filter) { filter->vtbl->free(filter); diff --git a/convert.h b/convert.h index a4838b5e5c..484b50965d 100644 --- a/convert.h +++ b/convert.h @@ -179,6 +179,8 @@ struct stream_filter; /* opaque */ struct stream_filter *get_stream_filter(const struct index_state *istate, const char *path, const struct object_id *); +struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, + const struct object_id *oid); void free_stream_filter(struct stream_filter *); int is_null_stream_filter(struct stream_filter *); From f59d15bb420b649a6ee3163a3418aac6b813d331 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Wed, 16 Dec 2020 11:50:33 -0300 Subject: [PATCH 4/9] convert: add classification for conv_attrs struct Create `enum conv_attrs_classification` to express the different ways that attributes are handled for a blob during checkout. This will be used in a later commit when deciding whether to add a file to the parallel or delayed queue during checkout. For now, we can also use it in get_stream_filter_ca() to simplify the function (as the classifying logic is the same). Signed-off-by: Jeff Hostetler Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- convert.c | 26 +++++++++++++++++++------- convert.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/convert.c b/convert.c index 9af6aafc5a..b9d25d9a47 100644 --- a/convert.c +++ b/convert.c @@ -1954,13 +1954,7 @@ struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca, { struct stream_filter *filter = NULL; - if (ca->drv && (ca->drv->process || ca->drv->smudge || ca->drv->clean)) - return NULL; - - if (ca->working_tree_encoding) - return NULL; - - if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF) + if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE) return NULL; if (ca->ident) @@ -2016,3 +2010,21 @@ void clone_checkout_metadata(struct checkout_metadata *dst, if (blob) oidcpy(&dst->blob, blob); } + +enum conv_attrs_classification classify_conv_attrs(const struct conv_attrs *ca) +{ + if (ca->drv) { + if (ca->drv->process) + return CA_CLASS_INCORE_PROCESS; + if (ca->drv->smudge || ca->drv->clean) + return CA_CLASS_INCORE_FILTER; + } + + if (ca->working_tree_encoding) + return CA_CLASS_INCORE; + + if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF) + return CA_CLASS_INCORE; + + return CA_CLASS_STREAMABLE; +} diff --git a/convert.h b/convert.h index 484b50965d..43e567a59b 100644 --- a/convert.h +++ b/convert.h @@ -200,4 +200,37 @@ int stream_filter(struct stream_filter *, const char *input, size_t *isize_p, char *output, size_t *osize_p); +enum conv_attrs_classification { + /* + * The blob must be loaded into a buffer before it can be + * smudged. All smudging is done in-proc. + */ + CA_CLASS_INCORE, + + /* + * The blob must be loaded into a buffer, but uses a + * single-file driver filter, such as rot13. + */ + CA_CLASS_INCORE_FILTER, + + /* + * The blob must be loaded into a buffer, but uses a + * long-running driver process, such as LFS. This might or + * might not use delayed operations. (The important thing is + * that there is a single subordinate long-running process + * handling all associated blobs and in case of delayed + * operations, may hold per-blob state.) + */ + CA_CLASS_INCORE_PROCESS, + + /* + * The blob can be streamed and smudged without needing to + * completely read it into a buffer. + */ + CA_CLASS_STREAMABLE, +}; + +enum conv_attrs_classification classify_conv_attrs( + const struct conv_attrs *ca); + #endif /* CONVERT_H */ From d052cc038270d1231a245d7ac6d60559123464d3 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Tue, 23 Mar 2021 11:19:32 -0300 Subject: [PATCH 5/9] entry: extract a header file for entry.c functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The declarations of entry.c's public functions and structures currently reside in cache.h. Although not many, they contribute to the size of cache.h and, when changed, cause the unnecessary recompilation of modules that don't really use these functions. So let's move them to a new entry.h header. While at it let's also move a comment related to checkout_entry() from entry.c to entry.h as it's more useful to describe the function there. Original-patch-by: Nguyễn Thái Ngọc Duy Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- apply.c | 1 + builtin/checkout-index.c | 1 + builtin/checkout.c | 1 + builtin/difftool.c | 1 + builtin/stash.c | 1 + cache.h | 24 ----------------------- entry.c | 9 +-------- entry.h | 42 ++++++++++++++++++++++++++++++++++++++++ unpack-trees.c | 1 + 9 files changed, 49 insertions(+), 32 deletions(-) create mode 100644 entry.h diff --git a/apply.c b/apply.c index 668b16e989..ea92a2455d 100644 --- a/apply.c +++ b/apply.c @@ -21,6 +21,7 @@ #include "quote.h" #include "rerere.h" #include "apply.h" +#include "entry.h" struct gitdiff_data { struct strbuf *root; diff --git a/builtin/checkout-index.c b/builtin/checkout-index.c index 023e49e271..c0bf4ac1b2 100644 --- a/builtin/checkout-index.c +++ b/builtin/checkout-index.c @@ -11,6 +11,7 @@ #include "quote.h" #include "cache-tree.h" #include "parse-options.h" +#include "entry.h" #define CHECKOUT_ALL 4 static int nul_term_line; diff --git a/builtin/checkout.c b/builtin/checkout.c index 2d6550bc3c..06c90d76e8 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -26,6 +26,7 @@ #include "unpack-trees.h" #include "wt-status.h" #include "xdiff-interface.h" +#include "entry.h" static const char * const checkout_usage[] = { N_("git checkout [] "), diff --git a/builtin/difftool.c b/builtin/difftool.c index 6e18e623fd..ef25729d49 100644 --- a/builtin/difftool.c +++ b/builtin/difftool.c @@ -23,6 +23,7 @@ #include "lockfile.h" #include "object-store.h" #include "dir.h" +#include "entry.h" static int trust_exit_code; diff --git a/builtin/stash.c b/builtin/stash.c index ba774cce67..11f3ae3039 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -15,6 +15,7 @@ #include "log-tree.h" #include "diffcore.h" #include "exec-cmd.h" +#include "entry.h" #define INCLUDE_ALL_FILES 2 diff --git a/cache.h b/cache.h index 6fda8091f1..5d45d145fa 100644 --- a/cache.h +++ b/cache.h @@ -1621,30 +1621,6 @@ const char *show_ident_date(const struct ident_split *id, */ int ident_cmp(const struct ident_split *, const struct ident_split *); -struct checkout { - struct index_state *istate; - const char *base_dir; - int base_dir_len; - struct delayed_checkout *delayed_checkout; - struct checkout_metadata meta; - unsigned force:1, - quiet:1, - not_new:1, - clone:1, - refresh_cache:1; -}; -#define CHECKOUT_INIT { NULL, "" } - -#define TEMPORARY_FILENAME_LENGTH 25 -int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath, int *nr_checkouts); -void enable_delayed_checkout(struct checkout *state); -int finish_delayed_checkout(struct checkout *state, int *nr_checkouts); -/* - * Unlink the last component and schedule the leading directories for - * removal, such that empty directories get removed. - */ -void unlink_entry(const struct cache_entry *ce); - struct cache_def { struct strbuf path; int flags; diff --git a/entry.c b/entry.c index 7b9f43716f..c3e511bfb3 100644 --- a/entry.c +++ b/entry.c @@ -6,6 +6,7 @@ #include "submodule.h" #include "progress.h" #include "fsmonitor.h" +#include "entry.h" static void create_directories(const char *path, int path_len, const struct checkout *state) @@ -429,14 +430,6 @@ static void mark_colliding_entries(const struct checkout *state, } } -/* - * Write the contents from ce out to the working tree. - * - * When topath[] is not NULL, instead of writing to the working tree - * file named by ce, a temporary file is created by this function and - * its name is returned in topath[], which must be able to hold at - * least TEMPORARY_FILENAME_LENGTH bytes long. - */ int checkout_entry(struct cache_entry *ce, const struct checkout *state, char *topath, int *nr_checkouts) { diff --git a/entry.h b/entry.h new file mode 100644 index 0000000000..acbbb90220 --- /dev/null +++ b/entry.h @@ -0,0 +1,42 @@ +#ifndef ENTRY_H +#define ENTRY_H + +#include "cache.h" +#include "convert.h" + +struct checkout { + struct index_state *istate; + const char *base_dir; + int base_dir_len; + struct delayed_checkout *delayed_checkout; + struct checkout_metadata meta; + unsigned force:1, + quiet:1, + not_new:1, + clone:1, + refresh_cache:1; +}; +#define CHECKOUT_INIT { NULL, "" } + +#define TEMPORARY_FILENAME_LENGTH 25 +/* + * Write the contents from ce out to the working tree. + * + * When topath[] is not NULL, instead of writing to the working tree + * file named by ce, a temporary file is created by this function and + * its name is returned in topath[], which must be able to hold at + * least TEMPORARY_FILENAME_LENGTH bytes long. + */ +int checkout_entry(struct cache_entry *ce, const struct checkout *state, + char *topath, int *nr_checkouts); + +void enable_delayed_checkout(struct checkout *state); +int finish_delayed_checkout(struct checkout *state, int *nr_checkouts); + +/* + * Unlink the last component and schedule the leading directories for + * removal, such that empty directories get removed. + */ +void unlink_entry(const struct cache_entry *ce); + +#endif /* ENTRY_H */ diff --git a/unpack-trees.c b/unpack-trees.c index eb8fcda31b..5b3dd38f8c 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -16,6 +16,7 @@ #include "fsmonitor.h" #include "object-store.h" #include "promisor-remote.h" +#include "entry.h" /* * Error messages expected by scripts out of plumbing commands such as From 49cfd9032a5c1ef7401b79eff8db471b4c6ed7ef Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Tue, 23 Mar 2021 11:19:33 -0300 Subject: [PATCH 6/9] entry: make fstat_output() and read_blob_entry() public These two functions will be used by the parallel checkout code, so let's make them public. Note: fstat_output() is renamed to fstat_checkout_output(), now that it has become public, seeking to avoid future name collisions. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- entry.c | 8 ++++---- entry.h | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/entry.c b/entry.c index c3e511bfb3..1e2d9f7baa 100644 --- a/entry.c +++ b/entry.c @@ -84,7 +84,7 @@ static int create_file(const char *path, unsigned int mode) return open(path, O_WRONLY | O_CREAT | O_EXCL, mode); } -static void *read_blob_entry(const struct cache_entry *ce, unsigned long *size) +void *read_blob_entry(const struct cache_entry *ce, unsigned long *size) { enum object_type type; void *blob_data = read_object_file(&ce->oid, &type, size); @@ -109,7 +109,7 @@ static int open_output_fd(char *path, const struct cache_entry *ce, int to_tempf } } -static int fstat_output(int fd, const struct checkout *state, struct stat *st) +int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st) { /* use fstat() only when path == ce->name */ if (fstat_is_reliable() && @@ -132,7 +132,7 @@ static int streaming_write_entry(const struct cache_entry *ce, char *path, return -1; result |= stream_blob_to_fd(fd, &ce->oid, filter, 1); - *fstat_done = fstat_output(fd, state, statbuf); + *fstat_done = fstat_checkout_output(fd, state, statbuf); result |= close(fd); if (result) @@ -346,7 +346,7 @@ static int write_entry(struct cache_entry *ce, wrote = write_in_full(fd, new_blob, size); if (!to_tempfile) - fstat_done = fstat_output(fd, state, &st); + fstat_done = fstat_checkout_output(fd, state, &st); close(fd); free(new_blob); if (wrote < 0) diff --git a/entry.h b/entry.h index acbbb90220..60df93ca78 100644 --- a/entry.h +++ b/entry.h @@ -39,4 +39,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts); */ void unlink_entry(const struct cache_entry *ce); +void *read_blob_entry(const struct cache_entry *ce, unsigned long *size); +int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st); + #endif /* ENTRY_H */ From 584a0d13f25d60694a1226cd274c33dba62bf9e4 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Tue, 23 Mar 2021 11:19:34 -0300 Subject: [PATCH 7/9] entry: extract update_ce_after_write() from write_entry() The code that updates the in-memory index information after an entry is written currently resides in write_entry(). Extract it to a public function so that it can be called by the parallel checkout functions, outside entry.c, in a later patch. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- entry.c | 25 ++++++++++++++++--------- entry.h | 2 ++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/entry.c b/entry.c index 1e2d9f7baa..4cf0db352f 100644 --- a/entry.c +++ b/entry.c @@ -251,6 +251,18 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts) return errs; } +void update_ce_after_write(const struct checkout *state, struct cache_entry *ce, + struct stat *st) +{ + if (state->refresh_cache) { + assert(state->istate); + fill_stat_cache_info(state->istate, ce, st); + ce->ce_flags |= CE_UPDATE_IN_BASE; + mark_fsmonitor_invalid(state->istate, ce); + state->istate->cache_changed |= CE_ENTRY_CHANGED; + } +} + static int write_entry(struct cache_entry *ce, char *path, const struct checkout *state, int to_tempfile) { @@ -371,15 +383,10 @@ static int write_entry(struct cache_entry *ce, finish: if (state->refresh_cache) { - assert(state->istate); - if (!fstat_done) - if (lstat(ce->name, &st) < 0) - return error_errno("unable to stat just-written file %s", - ce->name); - fill_stat_cache_info(state->istate, ce, &st); - ce->ce_flags |= CE_UPDATE_IN_BASE; - mark_fsmonitor_invalid(state->istate, ce); - state->istate->cache_changed |= CE_ENTRY_CHANGED; + if (!fstat_done && lstat(ce->name, &st) < 0) + return error_errno("unable to stat just-written file %s", + ce->name); + update_ce_after_write(state, ce , &st); } delayed: return 0; diff --git a/entry.h b/entry.h index 60df93ca78..ea7290bcd5 100644 --- a/entry.h +++ b/entry.h @@ -41,5 +41,7 @@ void unlink_entry(const struct cache_entry *ce); void *read_blob_entry(const struct cache_entry *ce, unsigned long *size); int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st); +void update_ce_after_write(const struct checkout *state, struct cache_entry *ce, + struct stat *st); #endif /* ENTRY_H */ From 30419e7e1d53232bb83dcace200d1295b326b22b Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Tue, 23 Mar 2021 11:19:35 -0300 Subject: [PATCH 8/9] entry: move conv_attrs lookup up to checkout_entry() In a following patch, checkout_entry() will use conv_attrs to decide whether an entry should be enqueued for parallel checkout or not. But the attributes lookup only happens lower in this call stack. To avoid the unnecessary work of loading the attributes twice, let's move it up to checkout_entry(), and pass the loaded struct down to write_entry(). Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- entry.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/entry.c b/entry.c index 4cf0db352f..6339d54843 100644 --- a/entry.c +++ b/entry.c @@ -263,8 +263,9 @@ void update_ce_after_write(const struct checkout *state, struct cache_entry *ce, } } -static int write_entry(struct cache_entry *ce, - char *path, const struct checkout *state, int to_tempfile) +/* Note: ca is used (and required) iff the entry refers to a regular file. */ +static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca, + const struct checkout *state, int to_tempfile) { unsigned int ce_mode_s_ifmt = ce->ce_mode & S_IFMT; struct delayed_checkout *dco = state->delayed_checkout; @@ -281,8 +282,7 @@ static int write_entry(struct cache_entry *ce, clone_checkout_metadata(&meta, &state->meta, &ce->oid); if (ce_mode_s_ifmt == S_IFREG) { - struct stream_filter *filter = get_stream_filter(state->istate, ce->name, - &ce->oid); + struct stream_filter *filter = get_stream_filter_ca(ca, &ce->oid); if (filter && !streaming_write_entry(ce, path, filter, state, to_tempfile, @@ -329,14 +329,17 @@ static int write_entry(struct cache_entry *ce, * Convert from git internal format to working tree format */ if (dco && dco->state != CE_NO_DELAY) { - ret = async_convert_to_working_tree(state->istate, ce->name, new_blob, - size, &buf, &meta, dco); + ret = async_convert_to_working_tree_ca(ca, ce->name, + new_blob, size, + &buf, &meta, dco); if (ret && string_list_has_string(&dco->paths, ce->name)) { free(new_blob); goto delayed; } - } else - ret = convert_to_working_tree(state->istate, ce->name, new_blob, size, &buf, &meta); + } else { + ret = convert_to_working_tree_ca(ca, ce->name, new_blob, + size, &buf, &meta); + } if (ret) { free(new_blob); @@ -442,6 +445,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, { static struct strbuf path = STRBUF_INIT; struct stat st; + struct conv_attrs ca_buf, *ca = NULL; if (ce->ce_flags & CE_WT_REMOVE) { if (topath) @@ -454,8 +458,13 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, return 0; } - if (topath) - return write_entry(ce, topath, state, 1); + if (topath) { + if (S_ISREG(ce->ce_mode)) { + convert_attrs(state->istate, &ca_buf, ce->name); + ca = &ca_buf; + } + return write_entry(ce, topath, ca, state, 1); + } strbuf_reset(&path); strbuf_add(&path, state->base_dir, state->base_dir_len); @@ -517,9 +526,16 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, return 0; create_directories(path.buf, path.len, state); + if (nr_checkouts) (*nr_checkouts)++; - return write_entry(ce, path.buf, state, 0); + + if (S_ISREG(ce->ce_mode)) { + convert_attrs(state->istate, &ca_buf, ce->name); + ca = &ca_buf; + } + + return write_entry(ce, path.buf, ca, state, 0); } void unlink_entry(const struct cache_entry *ce) From ae22751f9b4bbbebcd0366a48a118b5a575af72d Mon Sep 17 00:00:00 2001 From: Matheus Tavares Date: Tue, 23 Mar 2021 11:19:36 -0300 Subject: [PATCH 9/9] entry: add checkout_entry_ca() taking preloaded conv_attrs The parallel checkout machinery will call checkout_entry() for entries that could not be written in parallel due to path collisions. At this point, we will already be holding the conversion attributes for each entry, and it would be wasteful to let checkout_entry() load these again. Instead, let's add the checkout_entry_ca() variant, which optionally takes a preloaded conv_attrs struct. Signed-off-by: Matheus Tavares Signed-off-by: Junio C Hamano --- entry.c | 11 ++++++----- entry.h | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/entry.c b/entry.c index 6339d54843..2ce16414a7 100644 --- a/entry.c +++ b/entry.c @@ -440,12 +440,13 @@ static void mark_colliding_entries(const struct checkout *state, } } -int checkout_entry(struct cache_entry *ce, const struct checkout *state, - char *topath, int *nr_checkouts) +int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca, + const struct checkout *state, char *topath, + int *nr_checkouts) { static struct strbuf path = STRBUF_INIT; struct stat st; - struct conv_attrs ca_buf, *ca = NULL; + struct conv_attrs ca_buf; if (ce->ce_flags & CE_WT_REMOVE) { if (topath) @@ -459,7 +460,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, } if (topath) { - if (S_ISREG(ce->ce_mode)) { + if (S_ISREG(ce->ce_mode) && !ca) { convert_attrs(state->istate, &ca_buf, ce->name); ca = &ca_buf; } @@ -530,7 +531,7 @@ int checkout_entry(struct cache_entry *ce, const struct checkout *state, if (nr_checkouts) (*nr_checkouts)++; - if (S_ISREG(ce->ce_mode)) { + if (S_ISREG(ce->ce_mode) && !ca) { convert_attrs(state->istate, &ca_buf, ce->name); ca = &ca_buf; } diff --git a/entry.h b/entry.h index ea7290bcd5..b8c0e170dc 100644 --- a/entry.h +++ b/entry.h @@ -26,9 +26,21 @@ struct checkout { * file named by ce, a temporary file is created by this function and * its name is returned in topath[], which must be able to hold at * least TEMPORARY_FILENAME_LENGTH bytes long. + * + * With checkout_entry_ca(), callers can optionally pass a preloaded + * conv_attrs struct (to avoid reloading it), when ce refers to a + * regular file. If ca is NULL, the attributes will be loaded + * internally when (and if) needed. */ -int checkout_entry(struct cache_entry *ce, const struct checkout *state, - char *topath, int *nr_checkouts); +int checkout_entry_ca(struct cache_entry *ce, struct conv_attrs *ca, + const struct checkout *state, char *topath, + int *nr_checkouts); +static inline int checkout_entry(struct cache_entry *ce, + const struct checkout *state, char *topath, + int *nr_checkouts) +{ + return checkout_entry_ca(ce, NULL, state, topath, nr_checkouts); +} void enable_delayed_checkout(struct checkout *state); int finish_delayed_checkout(struct checkout *state, int *nr_checkouts);