1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-25 04:16:09 +02:00

shortlog: de-duplicate trailer values

The current documentation is vague about what happens with
--group=trailer:signed-off-by when we see a commit with:

  Signed-off-by: One
  Signed-off-by: Two
  Signed-off-by: One

We clearly should credit both "One" and "Two", but should "One" get
credited twice? The current code does so, but mostly because that was
the easiest thing to do. It's probably more useful to count each commit
at most once. This will become especially important when we allow
values from multiple sources in a future patch.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jeff King 2020-09-27 04:40:07 -04:00 committed by Junio C Hamano
parent 47beb37bc6
commit f17b0b99bf
3 changed files with 88 additions and 1 deletions

View File

@ -61,7 +61,8 @@ OPTIONS
+
Note that commits that do not include the trailer will not be counted.
Likewise, commits with multiple trailers (e.g., multiple signoffs) may
be counted more than once.
be counted more than once (but only once per unique trailer value in
that commit).
+
The contents of each trailer value are taken literally and completely.
No mailmap is applied, and the `-e` option has no effect (if the trailer

View File

@ -166,6 +166,60 @@ static void read_from_stdin(struct shortlog *log)
strbuf_release(&oneline);
}
struct strset_item {
struct hashmap_entry ent;
char value[FLEX_ARRAY];
};
struct strset {
struct hashmap map;
};
#define STRSET_INIT { { NULL } }
static int strset_item_hashcmp(const void *hash_data,
const struct hashmap_entry *entry,
const struct hashmap_entry *entry_or_key,
const void *keydata)
{
const struct strset_item *a, *b;
a = container_of(entry, const struct strset_item, ent);
if (keydata)
return strcmp(a->value, keydata);
b = container_of(entry_or_key, const struct strset_item, ent);
return strcmp(a->value, b->value);
}
/*
* Adds "str" to the set if it was not already present; returns true if it was
* already there.
*/
static int strset_check_and_add(struct strset *ss, const char *str)
{
unsigned int hash = strhash(str);
struct strset_item *item;
if (!ss->map.table)
hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0);
if (hashmap_get_from_hash(&ss->map, hash, str))
return 1;
FLEX_ALLOC_STR(item, value, str);
hashmap_entry_init(&item->ent, hash);
hashmap_add(&ss->map, &item->ent);
return 0;
}
static void strset_clear(struct strset *ss)
{
if (!ss->map.table)
return;
hashmap_free_entries(&ss->map, struct strset_item, ent);
}
static void insert_records_from_trailers(struct shortlog *log,
struct commit *commit,
struct pretty_print_context *ctx,
@ -173,6 +227,7 @@ static void insert_records_from_trailers(struct shortlog *log,
{
struct trailer_iterator iter;
const char *commit_buffer, *body;
struct strset dups = STRSET_INIT;
/*
* Using format_commit_message("%B") would be simpler here, but
@ -190,10 +245,13 @@ static void insert_records_from_trailers(struct shortlog *log,
if (strcasecmp(iter.key.buf, log->trailer))
continue;
if (strset_check_and_add(&dups, value))
continue;
insert_one_record(log, value, oneline);
}
trailer_iterator_release(&iter);
strset_clear(&dups);
unuse_commit_buffer(commit, commit_buffer);
}

View File

@ -234,4 +234,32 @@ test_expect_success 'shortlog --group=trailer:signed-off-by' '
test_cmp expect actual
'
test_expect_success 'shortlog de-duplicates trailers in a single commit' '
git commit --allow-empty -F - <<-\EOF &&
subject one
this message has two distinct values, plus a repeat
Repeated-trailer: Foo
Repeated-trailer: Bar
Repeated-trailer: Foo
EOF
git commit --allow-empty -F - <<-\EOF &&
subject two
similar to the previous, but without the second distinct value
Repeated-trailer: Foo
Repeated-trailer: Foo
EOF
cat >expect <<-\EOF &&
2 Foo
1 Bar
EOF
git shortlog -ns --group=trailer:repeated-trailer -2 HEAD >actual &&
test_cmp expect actual
'
test_done