1
0
mirror of https://github.com/git/git.git synced 2024-09-28 15:01:31 +02:00
git/builtin/name-rev.c
SZEDER Gábor 49f7a2fde9 name-rev: eliminate recursion in name_rev()
The name_rev() function calls itself recursively for each interesting
parent of the commit it got as parameter, and, consequently, it can
segfault when processing a deep history if it exhausts the available
stack space.  E.g. running 'git name-rev --all' and 'git name-rev
HEAD~100000' in the gcc, gecko-dev, llvm, and WebKit repositories
results in segfaults on my machine ('ulimit -s' reports 8192kB of
stack size limit), and nowadays the former segfaults in the Linux repo
as well (it reached the necessasry depth sometime between v5.3-rc4 and
-rc5).

Eliminate the recursion by inserting the interesting parents into a
LIFO 'prio_queue' [1] and iterating until the queue becomes empty.

Note that the parent commits must be added in reverse order to the
LIFO 'prio_queue', so their relative order is preserved during
processing, i.e. the first parent should come out first from the
queue, because otherwise performance greatly suffers on mergy
histories [2].

The stacksize-limited test 'name-rev works in a deep repo' in
't6120-describe.sh' demonstrated this issue and expected failure.  Now
the recursion is gone, so flip it to expect success.  Also gone are
the dmesg entries logging the segfault of that segfaulting 'git
name-rev' process on every execution of the test suite.

Note that this slightly changes the order of lines in the output of
'git name-rev --all', usually swapping two lines every 35 lines in
git.git or every 150 lines in linux.git.  This shouldn't matter in
practice, because the output has always been unordered anyway.

This patch is best viewed with '--ignore-all-space'.

[1] Early versions of this patch used a 'commit_list', resulting in
    ~15% performance penalty for 'git name-rev --all' in 'linux.git',
    presumably because of the memory allocation and release for each
    insertion and removal. Using a LIFO 'prio_queue' has basically no
    effect on performance.

[2] We prefer shorter names, i.e. 'v0.1~234' is preferred over
    'v0.1^2~5', meaning that usually following the first parent of a
    merge results in the best name for its ancestors.  So when later
    we follow the remaining parent(s) of a merge, and reach an already
    named commit, then we usually find that we can't give that commit
    a better name, and thus we don't have to visit any of its
    ancestors again.

    OTOH, if we were to follow the Nth parent of the merge first, then
    the name of all its ancestors would include a corresponding '^N'.
    Those are not the best names for those commits, so when later we
    reach an already named commit following the first parent of that
    merge, then we would have to update the name of that commit and
    the names of all of its ancestors as well.  Consequently, we would
    have to visit many commits several times, resulting in a
    significant slowdown.

Signed-off-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-12-09 13:33:01 -08:00

573 lines
14 KiB
C

#include "builtin.h"
#include "cache.h"
#include "repository.h"
#include "config.h"
#include "commit.h"
#include "tag.h"
#include "refs.h"
#include "parse-options.h"
#include "prio-queue.h"
#include "sha1-lookup.h"
#include "commit-slab.h"
/*
* One day. See the 'name a rev shortly after epoch' test in t6120 when
* changing this value
*/
#define CUTOFF_DATE_SLOP 86400
typedef struct rev_name {
const char *tip_name;
timestamp_t taggerdate;
int generation;
int distance;
int from_tag;
} rev_name;
define_commit_slab(commit_rev_name, struct rev_name *);
static timestamp_t cutoff = TIME_MAX;
static struct commit_rev_name rev_names;
/* How many generations are maximally preferred over _one_ merge traversal? */
#define MERGE_TRAVERSAL_WEIGHT 65535
static struct rev_name *get_commit_rev_name(struct commit *commit)
{
struct rev_name **slot = commit_rev_name_peek(&rev_names, commit);
return slot ? *slot : NULL;
}
static void set_commit_rev_name(struct commit *commit, struct rev_name *name)
{
*commit_rev_name_at(&rev_names, commit) = name;
}
static int is_better_name(struct rev_name *name,
timestamp_t taggerdate,
int distance,
int from_tag)
{
/*
* When comparing names based on tags, prefer names
* based on the older tag, even if it is farther away.
*/
if (from_tag && name->from_tag)
return (name->taggerdate > taggerdate ||
(name->taggerdate == taggerdate &&
name->distance > distance));
/*
* We know that at least one of them is a non-tag at this point.
* favor a tag over a non-tag.
*/
if (name->from_tag != from_tag)
return from_tag;
/*
* We are now looking at two non-tags. Tiebreak to favor
* shorter hops.
*/
if (name->distance != distance)
return name->distance > distance;
/* ... or tiebreak to favor older date */
if (name->taggerdate != taggerdate)
return name->taggerdate > taggerdate;
/* keep the current one if we cannot decide */
return 0;
}
static struct rev_name *create_or_update_name(struct commit *commit,
const char *tip_name,
timestamp_t taggerdate,
int generation, int distance,
int from_tag)
{
struct rev_name *name = get_commit_rev_name(commit);
if (name == NULL) {
name = xmalloc(sizeof(*name));
set_commit_rev_name(commit, name);
goto copy_data;
} else if (is_better_name(name, taggerdate, distance, from_tag)) {
copy_data:
name->tip_name = tip_name;
name->taggerdate = taggerdate;
name->generation = generation;
name->distance = distance;
name->from_tag = from_tag;
return name;
} else
return NULL;
}
static void name_rev(struct commit *start_commit,
const char *tip_name, timestamp_t taggerdate,
int from_tag)
{
struct prio_queue queue;
struct commit *commit;
struct commit **parents_to_queue = NULL;
size_t parents_to_queue_nr, parents_to_queue_alloc = 0;
memset(&queue, 0, sizeof(queue)); /* Use the prio_queue as LIFO */
prio_queue_put(&queue, start_commit);
while ((commit = prio_queue_get(&queue))) {
struct rev_name *name = get_commit_rev_name(commit);
struct commit_list *parents;
int parent_number = 1;
parents_to_queue_nr = 0;
for (parents = commit->parents;
parents;
parents = parents->next, parent_number++) {
struct commit *parent = parents->item;
const char *new_name;
int generation, distance;
parse_commit(parent);
if (parent->date < cutoff)
continue;
if (parent_number > 1) {
size_t len;
strip_suffix(name->tip_name, "^0", &len);
if (name->generation > 0)
new_name = xstrfmt("%.*s~%d^%d",
(int)len,
name->tip_name,
name->generation,
parent_number);
else
new_name = xstrfmt("%.*s^%d", (int)len,
name->tip_name,
parent_number);
generation = 0;
distance = name->distance + MERGE_TRAVERSAL_WEIGHT;
} else {
new_name = name->tip_name;
generation = name->generation + 1;
distance = name->distance + 1;
}
if (create_or_update_name(parent, new_name, taggerdate,
generation, distance,
from_tag)) {
ALLOC_GROW(parents_to_queue,
parents_to_queue_nr + 1,
parents_to_queue_alloc);
parents_to_queue[parents_to_queue_nr] = parent;
parents_to_queue_nr++;
}
}
/* The first parent must come out first from the prio_queue */
while (parents_to_queue_nr)
prio_queue_put(&queue,
parents_to_queue[--parents_to_queue_nr]);
}
clear_prio_queue(&queue);
free(parents_to_queue);
}
static int subpath_matches(const char *path, const char *filter)
{
const char *subpath = path;
while (subpath) {
if (!wildmatch(filter, subpath, 0))
return subpath - path;
subpath = strchr(subpath, '/');
if (subpath)
subpath++;
}
return -1;
}
static const char *name_ref_abbrev(const char *refname, int shorten_unambiguous)
{
if (shorten_unambiguous)
refname = shorten_unambiguous_ref(refname, 0);
else if (starts_with(refname, "refs/heads/"))
refname = refname + 11;
else if (starts_with(refname, "refs/"))
refname = refname + 5;
return refname;
}
struct name_ref_data {
int tags_only;
int name_only;
struct string_list ref_filters;
struct string_list exclude_filters;
};
static struct tip_table {
struct tip_table_entry {
struct object_id oid;
const char *refname;
} *table;
int nr;
int alloc;
int sorted;
} tip_table;
static void add_to_tip_table(const struct object_id *oid, const char *refname,
int shorten_unambiguous)
{
refname = name_ref_abbrev(refname, shorten_unambiguous);
ALLOC_GROW(tip_table.table, tip_table.nr + 1, tip_table.alloc);
oidcpy(&tip_table.table[tip_table.nr].oid, oid);
tip_table.table[tip_table.nr].refname = xstrdup(refname);
tip_table.nr++;
tip_table.sorted = 0;
}
static int tipcmp(const void *a_, const void *b_)
{
const struct tip_table_entry *a = a_, *b = b_;
return oidcmp(&a->oid, &b->oid);
}
static int name_ref(const char *path, const struct object_id *oid, int flags, void *cb_data)
{
struct object *o = parse_object(the_repository, oid);
struct name_ref_data *data = cb_data;
int can_abbreviate_output = data->tags_only && data->name_only;
int deref = 0;
timestamp_t taggerdate = TIME_MAX;
if (data->tags_only && !starts_with(path, "refs/tags/"))
return 0;
if (data->exclude_filters.nr) {
struct string_list_item *item;
for_each_string_list_item(item, &data->exclude_filters) {
if (subpath_matches(path, item->string) >= 0)
return 0;
}
}
if (data->ref_filters.nr) {
struct string_list_item *item;
int matched = 0;
/* See if any of the patterns match. */
for_each_string_list_item(item, &data->ref_filters) {
/*
* Check all patterns even after finding a match, so
* that we can see if a match with a subpath exists.
* When a user asked for 'refs/tags/v*' and 'v1.*',
* both of which match, the user is showing her
* willingness to accept a shortened output by having
* the 'v1.*' in the acceptable refnames, so we
* shouldn't stop when seeing 'refs/tags/v1.4' matches
* 'refs/tags/v*'. We should show it as 'v1.4'.
*/
switch (subpath_matches(path, item->string)) {
case -1: /* did not match */
break;
case 0: /* matched fully */
matched = 1;
break;
default: /* matched subpath */
matched = 1;
can_abbreviate_output = 1;
break;
}
}
/* If none of the patterns matched, stop now */
if (!matched)
return 0;
}
add_to_tip_table(oid, path, can_abbreviate_output);
while (o && o->type == OBJ_TAG) {
struct tag *t = (struct tag *) o;
if (!t->tagged)
break; /* broken repository */
o = parse_object(the_repository, &t->tagged->oid);
deref = 1;
taggerdate = t->date;
}
if (o && o->type == OBJ_COMMIT) {
struct commit *commit = (struct commit *)o;
int from_tag = starts_with(path, "refs/tags/");
if (taggerdate == TIME_MAX)
taggerdate = commit->date;
path = name_ref_abbrev(path, can_abbreviate_output);
if (commit->date >= cutoff) {
const char *tip_name;
char *to_free = NULL;
if (deref)
tip_name = to_free = xstrfmt("%s^0", path);
else
tip_name = xstrdup(path);
if (create_or_update_name(commit, tip_name, taggerdate,
0, 0, from_tag))
name_rev(commit, tip_name, taggerdate,
from_tag);
else
free(to_free);
}
}
return 0;
}
static const unsigned char *nth_tip_table_ent(size_t ix, void *table_)
{
struct tip_table_entry *table = table_;
return table[ix].oid.hash;
}
static const char *get_exact_ref_match(const struct object *o)
{
int found;
if (!tip_table.table || !tip_table.nr)
return NULL;
if (!tip_table.sorted) {
QSORT(tip_table.table, tip_table.nr, tipcmp);
tip_table.sorted = 1;
}
found = sha1_pos(o->oid.hash, tip_table.table, tip_table.nr,
nth_tip_table_ent);
if (0 <= found)
return tip_table.table[found].refname;
return NULL;
}
/* may return a constant string or use "buf" as scratch space */
static const char *get_rev_name(const struct object *o, struct strbuf *buf)
{
struct rev_name *n;
struct commit *c;
if (o->type != OBJ_COMMIT)
return get_exact_ref_match(o);
c = (struct commit *) o;
n = get_commit_rev_name(c);
if (!n)
return NULL;
if (!n->generation)
return n->tip_name;
else {
strbuf_reset(buf);
strbuf_addstr(buf, n->tip_name);
strbuf_strip_suffix(buf, "^0");
strbuf_addf(buf, "~%d", n->generation);
return buf->buf;
}
}
static void show_name(const struct object *obj,
const char *caller_name,
int always, int allow_undefined, int name_only)
{
const char *name;
const struct object_id *oid = &obj->oid;
struct strbuf buf = STRBUF_INIT;
if (!name_only)
printf("%s ", caller_name ? caller_name : oid_to_hex(oid));
name = get_rev_name(obj, &buf);
if (name)
printf("%s\n", name);
else if (allow_undefined)
printf("undefined\n");
else if (always)
printf("%s\n", find_unique_abbrev(oid, DEFAULT_ABBREV));
else
die("cannot describe '%s'", oid_to_hex(oid));
strbuf_release(&buf);
}
static char const * const name_rev_usage[] = {
N_("git name-rev [<options>] <commit>..."),
N_("git name-rev [<options>] --all"),
N_("git name-rev [<options>] --stdin"),
NULL
};
static void name_rev_line(char *p, struct name_ref_data *data)
{
struct strbuf buf = STRBUF_INIT;
int counter = 0;
char *p_start;
const unsigned hexsz = the_hash_algo->hexsz;
for (p_start = p; *p; p++) {
#define ishex(x) (isdigit((x)) || ((x) >= 'a' && (x) <= 'f'))
if (!ishex(*p))
counter = 0;
else if (++counter == hexsz &&
!ishex(*(p+1))) {
struct object_id oid;
const char *name = NULL;
char c = *(p+1);
int p_len = p - p_start + 1;
counter = 0;
*(p+1) = 0;
if (!get_oid(p - (hexsz - 1), &oid)) {
struct object *o =
lookup_object(the_repository, &oid);
if (o)
name = get_rev_name(o, &buf);
}
*(p+1) = c;
if (!name)
continue;
if (data->name_only)
printf("%.*s%s", p_len - hexsz, p_start, name);
else
printf("%.*s (%s)", p_len, p_start, name);
p_start = p + 1;
}
}
/* flush */
if (p_start != p)
fwrite(p_start, p - p_start, 1, stdout);
strbuf_release(&buf);
}
int cmd_name_rev(int argc, const char **argv, const char *prefix)
{
struct object_array revs = OBJECT_ARRAY_INIT;
int all = 0, transform_stdin = 0, allow_undefined = 1, always = 0, peel_tag = 0;
struct name_ref_data data = { 0, 0, STRING_LIST_INIT_NODUP, STRING_LIST_INIT_NODUP };
struct option opts[] = {
OPT_BOOL(0, "name-only", &data.name_only, N_("print only names (no SHA-1)")),
OPT_BOOL(0, "tags", &data.tags_only, N_("only use tags to name the commits")),
OPT_STRING_LIST(0, "refs", &data.ref_filters, N_("pattern"),
N_("only use refs matching <pattern>")),
OPT_STRING_LIST(0, "exclude", &data.exclude_filters, N_("pattern"),
N_("ignore refs matching <pattern>")),
OPT_GROUP(""),
OPT_BOOL(0, "all", &all, N_("list all commits reachable from all refs")),
OPT_BOOL(0, "stdin", &transform_stdin, N_("read from stdin")),
OPT_BOOL(0, "undefined", &allow_undefined, N_("allow to print `undefined` names (default)")),
OPT_BOOL(0, "always", &always,
N_("show abbreviated commit object as fallback")),
{
/* A Hidden OPT_BOOL */
OPTION_SET_INT, 0, "peel-tag", &peel_tag, NULL,
N_("dereference tags in the input (internal use)"),
PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, NULL, 1,
},
OPT_END(),
};
init_commit_rev_name(&rev_names);
git_config(git_default_config, NULL);
argc = parse_options(argc, argv, prefix, opts, name_rev_usage, 0);
if (all + transform_stdin + !!argc > 1) {
error("Specify either a list, or --all, not both!");
usage_with_options(name_rev_usage, opts);
}
if (all || transform_stdin)
cutoff = 0;
for (; argc; argc--, argv++) {
struct object_id oid;
struct object *object;
struct commit *commit;
if (get_oid(*argv, &oid)) {
fprintf(stderr, "Could not get sha1 for %s. Skipping.\n",
*argv);
continue;
}
commit = NULL;
object = parse_object(the_repository, &oid);
if (object) {
struct object *peeled = deref_tag(the_repository,
object, *argv, 0);
if (peeled && peeled->type == OBJ_COMMIT)
commit = (struct commit *)peeled;
}
if (!object) {
fprintf(stderr, "Could not get object for %s. Skipping.\n",
*argv);
continue;
}
if (commit) {
if (cutoff > commit->date)
cutoff = commit->date;
}
if (peel_tag) {
if (!commit) {
fprintf(stderr, "Could not get commit for %s. Skipping.\n",
*argv);
continue;
}
object = (struct object *)commit;
}
add_object_array(object, *argv, &revs);
}
if (cutoff) {
/* check for undeflow */
if (cutoff > TIME_MIN + CUTOFF_DATE_SLOP)
cutoff = cutoff - CUTOFF_DATE_SLOP;
else
cutoff = TIME_MIN;
}
for_each_ref(name_ref, &data);
if (transform_stdin) {
char buffer[2048];
while (!feof(stdin)) {
char *p = fgets(buffer, sizeof(buffer), stdin);
if (!p)
break;
name_rev_line(p, &data);
}
} else if (all) {
int i, max;
max = get_max_object_index();
for (i = 0; i < max; i++) {
struct object *obj = get_indexed_object(i);
if (!obj || obj->type != OBJ_COMMIT)
continue;
show_name(obj, NULL,
always, allow_undefined, data.name_only);
}
} else {
int i;
for (i = 0; i < revs.nr; i++)
show_name(revs.objects[i].item, revs.objects[i].name,
always, allow_undefined, data.name_only);
}
UNLEAK(revs);
return 0;
}