From e9c1b0e38cbaf626034c3741cc68f7d706aee451 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Tue, 19 Jul 2022 01:20:59 +0530 Subject: [PATCH 1/4] revision: improve commit_rewrite_person() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function, commit_rewrite_person(), is designed to find and replace an ident string in the header part, and the way it avoids a random occurrence of "author A U Thor Mentored-by: John Cai Helped-by: Đoàn Trần Công Danh Helped-by: Johannes Schindelin Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- revision.c | 64 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/revision.c b/revision.c index 211352795c..3418a1b7f1 100644 --- a/revision.c +++ b/revision.c @@ -3755,25 +3755,18 @@ int rewrite_parents(struct rev_info *revs, struct commit *commit, return 0; } -static int commit_rewrite_person(struct strbuf *buf, const char *what, struct string_list *mailmap) +/* + * Returns the difference between the new and old length of the ident line. + */ +static ssize_t rewrite_ident_line(const char *person, size_t len, + struct strbuf *buf, + struct string_list *mailmap) { - char *person, *endp; - size_t len, namelen, maillen; + size_t namelen, maillen; const char *name; const char *mail; struct ident_split ident; - person = strstr(buf->buf, what); - if (!person) - return 0; - - person += strlen(what); - endp = strchr(person, '\n'); - if (!endp) - return 0; - - len = endp - person; - if (split_ident_line(&ident, person, len)) return 0; @@ -3784,6 +3777,7 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st if (map_user(mailmap, &mail, &maillen, &name, &namelen)) { struct strbuf namemail = STRBUF_INIT; + size_t newlen; strbuf_addf(&namemail, "%.*s <%.*s>", (int)namelen, name, (int)maillen, mail); @@ -3791,15 +3785,50 @@ static int commit_rewrite_person(struct strbuf *buf, const char *what, struct st strbuf_splice(buf, ident.name_begin - buf->buf, ident.mail_end - ident.name_begin + 1, namemail.buf, namemail.len); + newlen = namemail.len; strbuf_release(&namemail); - return 1; + return newlen - (ident.mail_end - ident.name_begin); } return 0; } +static void commit_rewrite_person(struct strbuf *buf, const char **header, + struct string_list *mailmap) +{ + size_t buf_offset = 0; + + if (!mailmap) + return; + + for (;;) { + const char *person, *line; + size_t i; + int found_header = 0; + + line = buf->buf + buf_offset; + if (!*line || *line == '\n') + return; /* End of headers */ + + for (i = 0; header[i]; i++) + if (skip_prefix(line, header[i], &person)) { + const char *endp = strchrnul(person, '\n'); + found_header = 1; + buf_offset += endp - line; + buf_offset += rewrite_ident_line(person, endp - person, buf, mailmap); + break; + } + + if (!found_header) { + buf_offset = strchrnul(line, '\n') - buf->buf; + if (buf->buf[buf_offset] == '\n') + buf_offset++; + } + } +} + static int commit_match(struct commit *commit, struct rev_info *opt) { int retval; @@ -3832,11 +3861,12 @@ static int commit_match(struct commit *commit, struct rev_info *opt) strbuf_addstr(&buf, message); if (opt->grep_filter.header_list && opt->mailmap) { + const char *commit_headers[] = { "author ", "committer ", NULL }; + if (!buf.len) strbuf_addstr(&buf, message); - commit_rewrite_person(&buf, "\nauthor ", opt->mailmap); - commit_rewrite_person(&buf, "\ncommitter ", opt->mailmap); + commit_rewrite_person(&buf, commit_headers, opt->mailmap); } /* Append "fake" message parts as needed */ From dc88e349a297de6b7d0e21d81ac98f7816fcd473 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Tue, 19 Jul 2022 01:21:00 +0530 Subject: [PATCH 2/4] ident: move commit_rewrite_person() to ident.c commit_rewrite_person() and rewrite_ident_line() are static functions defined in revision.c. Their usages are as follows: - commit_rewrite_person() takes a commit buffer and replaces the author and committer idents with their canonical versions using the mailmap mechanism - rewrite_ident_line() takes author/committer header lines from the commit buffer and replaces the idents with their canonical versions using the mailmap mechanism. This patch moves commit_rewrite_person() and rewrite_ident_line() to ident.c which contains many other functions related to idents like split_ident_line(). By moving commit_rewrite_person() to ident.c, we also intend to use it in git-cat-file to replace committer and author idents from the headers to their canonical versions using the mailmap mechanism. The function is moved as is for now to make it clear that there are no other changes, but it will be renamed in a following commit. Mentored-by: Christian Couder Mentored-by: John Cai Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- cache.h | 6 +++++ ident.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ revision.c | 74 ------------------------------------------------------ 3 files changed, 80 insertions(+), 74 deletions(-) diff --git a/cache.h b/cache.h index ac5ab4ef9d..16a08aada2 100644 --- a/cache.h +++ b/cache.h @@ -1688,6 +1688,12 @@ struct ident_split { */ int split_ident_line(struct ident_split *, const char *, int); +/* + * Given a commit object buffer and the commit headers, replaces the idents + * in the headers with their canonical versions using the mailmap mechanism. + */ +void commit_rewrite_person(struct strbuf *, const char **, struct string_list *); + /* * Compare split idents for equality or strict ordering. Note that we * compare only the ident part of the line, ignoring any timestamp. diff --git a/ident.c b/ident.c index 89ca5b4700..1eee4fd0e3 100644 --- a/ident.c +++ b/ident.c @@ -8,6 +8,7 @@ #include "cache.h" #include "config.h" #include "date.h" +#include "mailmap.h" static struct strbuf git_default_name = STRBUF_INIT; static struct strbuf git_default_email = STRBUF_INIT; @@ -346,6 +347,79 @@ int split_ident_line(struct ident_split *split, const char *line, int len) return 0; } +/* + * Returns the difference between the new and old length of the ident line. + */ +static ssize_t rewrite_ident_line(const char *person, size_t len, + struct strbuf *buf, + struct string_list *mailmap) +{ + size_t namelen, maillen; + const char *name; + const char *mail; + struct ident_split ident; + + if (split_ident_line(&ident, person, len)) + return 0; + + mail = ident.mail_begin; + maillen = ident.mail_end - ident.mail_begin; + name = ident.name_begin; + namelen = ident.name_end - ident.name_begin; + + if (map_user(mailmap, &mail, &maillen, &name, &namelen)) { + struct strbuf namemail = STRBUF_INIT; + size_t newlen; + + strbuf_addf(&namemail, "%.*s <%.*s>", + (int)namelen, name, (int)maillen, mail); + + strbuf_splice(buf, ident.name_begin - buf->buf, + ident.mail_end - ident.name_begin + 1, + namemail.buf, namemail.len); + newlen = namemail.len; + + strbuf_release(&namemail); + + return newlen - (ident.mail_end - ident.name_begin); + } + + return 0; +} + +void commit_rewrite_person(struct strbuf *buf, const char **header, + struct string_list *mailmap) +{ + size_t buf_offset = 0; + + if (!mailmap) + return; + + for (;;) { + const char *person, *line; + size_t i; + int found_header = 0; + + line = buf->buf + buf_offset; + if (!*line || *line == '\n') + return; /* End of headers */ + + for (i = 0; header[i]; i++) + if (skip_prefix(line, header[i], &person)) { + const char *endp = strchrnul(person, '\n'); + found_header = 1; + buf_offset += endp - line; + buf_offset += rewrite_ident_line(person, endp - person, buf, mailmap); + break; + } + + if (!found_header) { + buf_offset = strchrnul(line, '\n') - buf->buf; + if (buf->buf[buf_offset] == '\n') + buf_offset++; + } + } +} static void ident_env_hint(enum want_ident whose_ident) { diff --git a/revision.c b/revision.c index 3418a1b7f1..14dca903b6 100644 --- a/revision.c +++ b/revision.c @@ -3755,80 +3755,6 @@ int rewrite_parents(struct rev_info *revs, struct commit *commit, return 0; } -/* - * Returns the difference between the new and old length of the ident line. - */ -static ssize_t rewrite_ident_line(const char *person, size_t len, - struct strbuf *buf, - struct string_list *mailmap) -{ - size_t namelen, maillen; - const char *name; - const char *mail; - struct ident_split ident; - - if (split_ident_line(&ident, person, len)) - return 0; - - mail = ident.mail_begin; - maillen = ident.mail_end - ident.mail_begin; - name = ident.name_begin; - namelen = ident.name_end - ident.name_begin; - - if (map_user(mailmap, &mail, &maillen, &name, &namelen)) { - struct strbuf namemail = STRBUF_INIT; - size_t newlen; - - strbuf_addf(&namemail, "%.*s <%.*s>", - (int)namelen, name, (int)maillen, mail); - - strbuf_splice(buf, ident.name_begin - buf->buf, - ident.mail_end - ident.name_begin + 1, - namemail.buf, namemail.len); - newlen = namemail.len; - - strbuf_release(&namemail); - - return newlen - (ident.mail_end - ident.name_begin); - } - - return 0; -} - -static void commit_rewrite_person(struct strbuf *buf, const char **header, - struct string_list *mailmap) -{ - size_t buf_offset = 0; - - if (!mailmap) - return; - - for (;;) { - const char *person, *line; - size_t i; - int found_header = 0; - - line = buf->buf + buf_offset; - if (!*line || *line == '\n') - return; /* End of headers */ - - for (i = 0; header[i]; i++) - if (skip_prefix(line, header[i], &person)) { - const char *endp = strchrnul(person, '\n'); - found_header = 1; - buf_offset += endp - line; - buf_offset += rewrite_ident_line(person, endp - person, buf, mailmap); - break; - } - - if (!found_header) { - buf_offset = strchrnul(line, '\n') - buf->buf; - if (buf->buf[buf_offset] == '\n') - buf_offset++; - } - } -} - static int commit_match(struct commit *commit, struct rev_info *opt) { int retval; From 66a8a95315edb3feba1190dcd89a208ae71bda61 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Tue, 19 Jul 2022 01:21:01 +0530 Subject: [PATCH 3/4] ident: rename commit_rewrite_person() to apply_mailmap_to_header() commit_rewrite_person() takes a commit buffer and replaces the idents in the header with their canonical versions using the mailmap mechanism. The name "commit_rewrite_person()" is misleading as it doesn't convey what kind of rewrite are we going to do to the buffer. It also doesn't clearly mention that the function will limit itself to the header part of the buffer. The new name, "apply_mailmap_to_header()", expresses the functionality of the function pretty clearly. We intend to use apply_mailmap_to_header() in git-cat-file to replace idents in the headers of commit and tag object buffers. So, we will be extending this function to take tag objects buffer as well and replace idents on the tagger header using the mailmap mechanism. Mentored-by: Christian Couder Mentored-by: John Cai Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- cache.h | 6 +++--- ident.c | 4 ++-- revision.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cache.h b/cache.h index 16a08aada2..4aa1bd079d 100644 --- a/cache.h +++ b/cache.h @@ -1689,10 +1689,10 @@ struct ident_split { int split_ident_line(struct ident_split *, const char *, int); /* - * Given a commit object buffer and the commit headers, replaces the idents - * in the headers with their canonical versions using the mailmap mechanism. + * Given a commit or tag object buffer and the commit or tag headers, replaces + * the idents in the headers with their canonical versions using the mailmap mechanism. */ -void commit_rewrite_person(struct strbuf *, const char **, struct string_list *); +void apply_mailmap_to_header(struct strbuf *, const char **, struct string_list *); /* * Compare split idents for equality or strict ordering. Note that we diff --git a/ident.c b/ident.c index 1eee4fd0e3..7f66beda42 100644 --- a/ident.c +++ b/ident.c @@ -387,8 +387,8 @@ static ssize_t rewrite_ident_line(const char *person, size_t len, return 0; } -void commit_rewrite_person(struct strbuf *buf, const char **header, - struct string_list *mailmap) +void apply_mailmap_to_header(struct strbuf *buf, const char **header, + struct string_list *mailmap) { size_t buf_offset = 0; diff --git a/revision.c b/revision.c index 14dca903b6..6ad3665204 100644 --- a/revision.c +++ b/revision.c @@ -3792,7 +3792,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt) if (!buf.len) strbuf_addstr(&buf, message); - commit_rewrite_person(&buf, commit_headers, opt->mailmap); + apply_mailmap_to_header(&buf, commit_headers, opt->mailmap); } /* Append "fake" message parts as needed */ From ec031da9f97a2545601304b5ac1e93fee09425b4 Mon Sep 17 00:00:00 2001 From: Siddharth Asthana Date: Tue, 19 Jul 2022 01:21:02 +0530 Subject: [PATCH 4/4] cat-file: add mailmap support git-cat-file is used by tools like GitLab to get commit tag contents that are then displayed to users. This content which has author, committer or tagger information, could benefit from passing through the mailmap mechanism before being sent or displayed. This patch adds --[no-]use-mailmap command line option to the git cat-file command. It also adds --[no-]mailmap option as an alias to --[no-]use-mailmap. This patch also introduces new test cases to test the mailmap mechanism in git cat-file command. Mentored-by: Christian Couder Mentored-by: John Cai Helped-by: Phillip Wood Helped-by: Johannes Schindelin Signed-off-by: Siddharth Asthana Signed-off-by: Junio C Hamano --- Documentation/git-cat-file.txt | 6 ++++ builtin/cat-file.c | 43 ++++++++++++++++++++++++- t/t4203-mailmap.sh | 59 ++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt index 24a811f0ef..1880e9bba1 100644 --- a/Documentation/git-cat-file.txt +++ b/Documentation/git-cat-file.txt @@ -63,6 +63,12 @@ OPTIONS or to ask for a "blob" with `` being a tag object that points at it. +--[no-]mailmap:: +--[no-]use-mailmap:: + Use mailmap file to map author, committer and tagger names + and email addresses to canonical real names and email addresses. + See linkgit:git-shortlog[1]. + --textconv:: Show the content as transformed by a textconv filter. In this case, `` has to be of the form `:`, or `:` in diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 50cf38999d..4b68216b51 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -16,6 +16,7 @@ #include "packfile.h" #include "object-store.h" #include "promisor-remote.h" +#include "mailmap.h" enum batch_mode { BATCH_MODE_CONTENTS, @@ -36,6 +37,22 @@ struct batch_options { static const char *force_path; +static struct string_list mailmap = STRING_LIST_INIT_NODUP; +static int use_mailmap; + +static char *replace_idents_using_mailmap(char *, size_t *); + +static char *replace_idents_using_mailmap(char *object_buf, size_t *size) +{ + struct strbuf sb = STRBUF_INIT; + const char *headers[] = { "author ", "committer ", "tagger ", NULL }; + + strbuf_attach(&sb, object_buf, *size, *size + 1); + apply_mailmap_to_header(&sb, headers, &mailmap); + *size = sb.len; + return strbuf_detach(&sb, NULL); +} + static int filter_object(const char *path, unsigned mode, const struct object_id *oid, char **buf, unsigned long *size) @@ -152,6 +169,12 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, if (!buf) die("Cannot read object %s", obj_name); + if (use_mailmap) { + size_t s = size; + buf = replace_idents_using_mailmap(buf, &s); + size = cast_size_t_to_ulong(s); + } + /* otherwise just spit out the data */ break; @@ -183,6 +206,12 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, } buf = read_object_with_reference(the_repository, &oid, exp_type_id, &size, NULL); + + if (use_mailmap) { + size_t s = size; + buf = replace_idents_using_mailmap(buf, &s); + size = cast_size_t_to_ulong(s); + } break; } default: @@ -348,11 +377,18 @@ static void print_object_or_die(struct batch_options *opt, struct expand_data *d void *contents; contents = read_object_file(oid, &type, &size); + + if (use_mailmap) { + size_t s = size; + contents = replace_idents_using_mailmap(contents, &s); + size = cast_size_t_to_ulong(s); + } + if (!contents) die("object %s disappeared", oid_to_hex(oid)); if (type != data->type) die("object %s changed type!?", oid_to_hex(oid)); - if (data->info.sizep && size != data->size) + if (data->info.sizep && size != data->size && !use_mailmap) die("object %s changed size!?", oid_to_hex(oid)); batch_write(opt, contents, size); @@ -843,6 +879,8 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) OPT_CMDMODE('s', NULL, &opt, N_("show object size"), 's'), OPT_BOOL(0, "allow-unknown-type", &unknown_type, N_("allow -s and -t to work with broken/corrupt objects")), + OPT_BOOL(0, "use-mailmap", &use_mailmap, N_("use mail map file")), + OPT_ALIAS(0, "mailmap", "use-mailmap"), /* Batch mode */ OPT_GROUP(N_("Batch objects requested on stdin (or --batch-all-objects)")), OPT_CALLBACK_F(0, "batch", &batch, N_("format"), @@ -885,6 +923,9 @@ int cmd_cat_file(int argc, const char **argv, const char *prefix) opt_cw = (opt == 'c' || opt == 'w'); opt_epts = (opt == 'e' || opt == 'p' || opt == 't' || opt == 's'); + if (use_mailmap) + read_mailmap(&mailmap); + /* --batch-all-objects? */ if (opt == 'b') batch.all_objects = 1; diff --git a/t/t4203-mailmap.sh b/t/t4203-mailmap.sh index 0b2d21ec55..cd1cab3e54 100755 --- a/t/t4203-mailmap.sh +++ b/t/t4203-mailmap.sh @@ -963,4 +963,63 @@ test_expect_success SYMLINKS 'symlinks not respected in-tree' ' test_cmp expect actual ' +test_expect_success 'prepare for cat-file --mailmap' ' + rm -f .mailmap && + git commit --allow-empty -m foo --author="Orig " +' + +test_expect_success '--no-use-mailmap disables mailmap in cat-file' ' + test_when_finished "rm .mailmap" && + cat >.mailmap <<-EOF && + A U Thor Orig + EOF + cat >expect <<-EOF && + author Orig + EOF + git cat-file --no-use-mailmap commit HEAD >log && + sed -n "/^author /s/\([^>]*>\).*/\1/p" log >actual && + test_cmp expect actual +' + +test_expect_success '--use-mailmap enables mailmap in cat-file' ' + test_when_finished "rm .mailmap" && + cat >.mailmap <<-EOF && + A U Thor Orig + EOF + cat >expect <<-EOF && + author A U Thor + EOF + git cat-file --use-mailmap commit HEAD >log && + sed -n "/^author /s/\([^>]*>\).*/\1/p" log >actual && + test_cmp expect actual +' + +test_expect_success '--no-mailmap disables mailmap in cat-file for annotated tag objects' ' + test_when_finished "rm .mailmap" && + cat >.mailmap <<-EOF && + Orig C O Mitter + EOF + cat >expect <<-EOF && + tagger C O Mitter + EOF + git tag -a -m "annotated tag" v1 && + git cat-file --no-mailmap -p v1 >log && + sed -n "/^tagger /s/\([^>]*>\).*/\1/p" log >actual && + test_cmp expect actual +' + +test_expect_success '--mailmap enables mailmap in cat-file for annotated tag objects' ' + test_when_finished "rm .mailmap" && + cat >.mailmap <<-EOF && + Orig C O Mitter + EOF + cat >expect <<-EOF && + tagger Orig + EOF + git tag -a -m "annotated tag" v2 && + git cat-file --mailmap -p v2 >log && + sed -n "/^tagger /s/\([^>]*>\).*/\1/p" log >actual && + test_cmp expect actual +' + test_done