From 16b1985be553b5fc6273eb9d7277173623e2d7cb Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 20 Jan 2021 11:04:21 -0500 Subject: [PATCH 1/3] refs: expose 'for_each_fullref_in_prefixes' This function was used in the ref-filter.c code to find the longest common prefix of among a set of refspecs, and then to iterate all of the references that descend from that prefix. A future patch will want to use that same code from ls-refs.c, so prepare by exposing and moving it to refs.c. Since there is nothing specific to the ref-filter code here (other than that it was previously the only caller of this function), this really belongs in the more generic refs.h header. The code moved in this patch is identical before and after, with the one exception of renaming some arguments to be consistent with other functions exposed in refs.h. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- ref-filter.c | 74 ++------------------------------------------ refs.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++ refs.h | 9 ++++++ 3 files changed, 98 insertions(+), 72 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index aa260bfd09..f918f00151 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1929,64 +1929,6 @@ static int filter_pattern_match(struct ref_filter *filter, const char *refname) return match_pattern(filter, refname); } -static int qsort_strcmp(const void *va, const void *vb) -{ - const char *a = *(const char **)va; - const char *b = *(const char **)vb; - - return strcmp(a, b); -} - -static void find_longest_prefixes_1(struct string_list *out, - struct strbuf *prefix, - const char **patterns, size_t nr) -{ - size_t i; - - for (i = 0; i < nr; i++) { - char c = patterns[i][prefix->len]; - if (!c || is_glob_special(c)) { - string_list_append(out, prefix->buf); - return; - } - } - - i = 0; - while (i < nr) { - size_t end; - - /* - * Set "end" to the index of the element _after_ the last one - * in our group. - */ - for (end = i + 1; end < nr; end++) { - if (patterns[i][prefix->len] != patterns[end][prefix->len]) - break; - } - - strbuf_addch(prefix, patterns[i][prefix->len]); - find_longest_prefixes_1(out, prefix, patterns + i, end - i); - strbuf_setlen(prefix, prefix->len - 1); - - i = end; - } -} - -static void find_longest_prefixes(struct string_list *out, - const char **patterns) -{ - struct strvec sorted = STRVEC_INIT; - struct strbuf prefix = STRBUF_INIT; - - strvec_pushv(&sorted, patterns); - QSORT(sorted.v, sorted.nr, qsort_strcmp); - - find_longest_prefixes_1(out, &prefix, sorted.v, sorted.nr); - - strvec_clear(&sorted); - strbuf_release(&prefix); -} - /* * This is the same as for_each_fullref_in(), but it tries to iterate * only over the patterns we'll care about. Note that it _doesn't_ do a full @@ -1997,10 +1939,6 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, void *cb_data, int broken) { - struct string_list prefixes = STRING_LIST_INIT_DUP; - struct string_list_item *prefix; - int ret; - if (!filter->match_as_path) { /* * in this case, the patterns are applied after @@ -2024,16 +1962,8 @@ static int for_each_fullref_in_pattern(struct ref_filter *filter, return for_each_fullref_in("", cb, cb_data, broken); } - find_longest_prefixes(&prefixes, filter->name_patterns); - - for_each_string_list_item(prefix, &prefixes) { - ret = for_each_fullref_in(prefix->string, cb, cb_data, broken); - if (ret) - break; - } - - string_list_clear(&prefixes, 0); - return ret; + return for_each_fullref_in_prefixes(NULL, filter->name_patterns, + cb, cb_data, broken); } /* diff --git a/refs.c b/refs.c index 13dc2c3291..e0f439b7fd 100644 --- a/refs.c +++ b/refs.c @@ -1546,6 +1546,93 @@ int for_each_rawref(each_ref_fn fn, void *cb_data) return refs_for_each_rawref(get_main_ref_store(the_repository), fn, cb_data); } +static int qsort_strcmp(const void *va, const void *vb) +{ + const char *a = *(const char **)va; + const char *b = *(const char **)vb; + + return strcmp(a, b); +} + +static void find_longest_prefixes_1(struct string_list *out, + struct strbuf *prefix, + const char **patterns, size_t nr) +{ + size_t i; + + for (i = 0; i < nr; i++) { + char c = patterns[i][prefix->len]; + if (!c || is_glob_special(c)) { + string_list_append(out, prefix->buf); + return; + } + } + + i = 0; + while (i < nr) { + size_t end; + + /* + * Set "end" to the index of the element _after_ the last one + * in our group. + */ + for (end = i + 1; end < nr; end++) { + if (patterns[i][prefix->len] != patterns[end][prefix->len]) + break; + } + + strbuf_addch(prefix, patterns[i][prefix->len]); + find_longest_prefixes_1(out, prefix, patterns + i, end - i); + strbuf_setlen(prefix, prefix->len - 1); + + i = end; + } +} + +static void find_longest_prefixes(struct string_list *out, + const char **patterns) +{ + struct strvec sorted = STRVEC_INIT; + struct strbuf prefix = STRBUF_INIT; + + strvec_pushv(&sorted, patterns); + QSORT(sorted.v, sorted.nr, qsort_strcmp); + + find_longest_prefixes_1(out, &prefix, sorted.v, sorted.nr); + + strvec_clear(&sorted); + strbuf_release(&prefix); +} + +int for_each_fullref_in_prefixes(const char *namespace, + const char **patterns, + each_ref_fn fn, void *cb_data, + unsigned int broken) +{ + struct string_list prefixes = STRING_LIST_INIT_DUP; + struct string_list_item *prefix; + struct strbuf buf = STRBUF_INIT; + int ret = 0, namespace_len; + + find_longest_prefixes(&prefixes, patterns); + + if (namespace) + strbuf_addstr(&buf, namespace); + namespace_len = buf.len; + + for_each_string_list_item(prefix, &prefixes) { + strbuf_addstr(&buf, prefix->string); + ret = for_each_fullref_in(buf.buf, fn, cb_data, broken); + if (ret) + break; + strbuf_setlen(&buf, namespace_len); + } + + string_list_clear(&prefixes, 0); + strbuf_release(&buf); + return ret; +} + static int refs_read_special_head(struct ref_store *ref_store, const char *refname, struct object_id *oid, struct strbuf *referent, unsigned int *type) diff --git a/refs.h b/refs.h index ff05d2e9fe..c5fd35487d 100644 --- a/refs.h +++ b/refs.h @@ -347,6 +347,15 @@ int refs_for_each_fullref_in(struct ref_store *refs, const char *prefix, int for_each_fullref_in(const char *prefix, each_ref_fn fn, void *cb_data, unsigned int broken); +/** + * iterate all refs in "patterns" by partitioning patterns into disjoint sets + * and iterating the longest-common prefix of each set. + * + * callers should be prepared to ignore references that they did not ask for. + */ +int for_each_fullref_in_prefixes(const char *namespace, const char **patterns, + each_ref_fn fn, void *cb_data, + unsigned int broken); /** * iterate refs from the respective area. */ From 83befd37249726f6a94e55f5aad1113fd18102a0 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Wed, 20 Jan 2021 11:04:25 -0500 Subject: [PATCH 2/3] ls-refs.c: initialize 'prefixes' before using it Correctly initialize the "prefixes" strvec using strvec_init() instead of simply zeroing it via the earlier memset(). There's no way to trigger a crash, since the first 'ref-prefix' command will initialize the strvec via the 'ALLOC_GROW' in 'strvec_push_nodup()' (the alloc and nr variables are already zero'd, so the call to ALLOC_GROW is valid). If no "ref-prefix" command was given, then the call to 'ls-refs.c:ref_match()' will abort early after it reads the zero in 'prefixes->nr'. Likewise, strvec_clear() will only call free() on the array, which is NULL, so we're safe there, too. But, all of this is dangerous and requires more reasoning than it would if we simply called 'strvec_init()', so do that. Signed-off-by: Jacob Vosmaer Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- ls-refs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ls-refs.c b/ls-refs.c index a1e0b473e4..367597d447 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -90,6 +90,7 @@ int ls_refs(struct repository *r, struct strvec *keys, struct ls_refs_data data; memset(&data, 0, sizeof(data)); + strvec_init(&data.prefixes); git_config(ls_refs_config, NULL); From b3970c702cb0acc0551d88a5f34ad4ad2e2a6d39 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Wed, 20 Jan 2021 11:04:30 -0500 Subject: [PATCH 3/3] ls-refs.c: traverse prefixes of disjoint "ref-prefix" sets ls-refs performs a single revision walk over the whole ref namespace, and sends ones that match with one of the given ref prefixes down to the user. This can be expensive if there are many refs overall, but the portion of them covered by the given prefixes is small by comparison. To attempt to reduce the difference between the number of refs traversed, and the number of refs sent, only traverse references which are in the longest common prefix of the given prefixes. This is very reminiscent of the approach taken in b31e2680c4 (ref-filter.c: find disjoint pattern prefixes, 2019-06-26) which does an analogous thing for multi-patterned 'git for-each-ref' invocations. The callback 'send_ref' is resilient to ignore extra patterns by discarding any arguments which do not begin with at least one of the specified prefixes. Similarly, the code introduced in b31e2680c4 is resilient to stop early at metacharacters, but we only pass strict prefixes here. At worst we would return too many results, but the double checking done by send_ref will throw away anything that doesn't start with something in the prefix list. Finally, if no prefixes were provided, then implicitly add the empty string (which will match all references) since this matches the existing behavior (see the "no restrictions" comment in "ls-refs.c:ref_match()"). Original-patch-by: Jacob Vosmaer Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- ls-refs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ls-refs.c b/ls-refs.c index 367597d447..eaaa36d0df 100644 --- a/ls-refs.c +++ b/ls-refs.c @@ -110,7 +110,10 @@ int ls_refs(struct repository *r, struct strvec *keys, die(_("expected flush after ls-refs arguments")); head_ref_namespaced(send_ref, &data); - for_each_namespaced_ref(send_ref, &data); + if (!data.prefixes.nr) + strvec_push(&data.prefixes, ""); + for_each_fullref_in_prefixes(get_git_namespace(), data.prefixes.v, + send_ref, &data, 0); packet_flush(1); strvec_clear(&data.prefixes); return 0;