From d582992e80aa29c37cb449922e713a0c288ffe91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Thu, 6 May 2021 22:02:18 +0700 Subject: [PATCH 1/6] mailinfo: load default metainfo_charset lazily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a later change, we will use parse_option to parse mailinfo's options. In mailinfo, both "-u", "-n", and "--encoding" try to set the same field, with "-u" reset that field to some default value from configuration variable "i18n.commitEncoding". Let's delay the setting of that field until we finish processing all options. By doing that, "i18n.commitEncoding" can be parsed on demand. More importantly, it cleans the way for using parse_option. This change introduces some inconsistent brackets "{}" in "if/else if" construct, however, we will rewrite them in the next few changes. Signed-off-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index cfb667a594c..77f96177ccd 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -11,17 +11,25 @@ static const char mailinfo_usage[] = "git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding= | -n] [--scissors | --no-scissors] < mail >info"; +struct metainfo_charset +{ + enum { + CHARSET_DEFAULT, + CHARSET_NO_REENCODE, + CHARSET_EXPLICIT, + } policy; + const char *charset; +}; + int cmd_mailinfo(int argc, const char **argv, const char *prefix) { - const char *def_charset; + struct metainfo_charset meta_charset; struct mailinfo mi; int status; char *msgfile, *patchfile; setup_mailinfo(&mi); - - def_charset = get_commit_output_encoding(); - mi.metainfo_charset = def_charset; + meta_charset.policy = CHARSET_DEFAULT; while (1 < argc && argv[1][0] == '-') { if (!strcmp(argv[1], "-k")) @@ -31,12 +39,13 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id")) mi.add_message_id = 1; else if (!strcmp(argv[1], "-u")) - mi.metainfo_charset = def_charset; + meta_charset.policy = CHARSET_DEFAULT; else if (!strcmp(argv[1], "-n")) - mi.metainfo_charset = NULL; - else if (starts_with(argv[1], "--encoding=")) - mi.metainfo_charset = argv[1] + 11; - else if (!strcmp(argv[1], "--scissors")) + meta_charset.policy = CHARSET_NO_REENCODE; + else if (starts_with(argv[1], "--encoding=")) { + meta_charset.policy = CHARSET_EXPLICIT; + meta_charset.charset = argv[1] + 11; + } else if (!strcmp(argv[1], "--scissors")) mi.use_scissors = 1; else if (!strcmp(argv[1], "--no-scissors")) mi.use_scissors = 0; @@ -50,6 +59,19 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) if (argc != 3) usage(mailinfo_usage); + switch (meta_charset.policy) { + case CHARSET_DEFAULT: + mi.metainfo_charset = get_commit_output_encoding(); + break; + case CHARSET_NO_REENCODE: + mi.metainfo_charset = NULL; + break; + case CHARSET_EXPLICIT: + break; + default: + BUG("invalid meta_charset.policy"); + } + mi.input = stdin; mi.output = stdout; From dd9323b7fbda92b988bbfe835fd75c179a3b4780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Thu, 6 May 2021 22:02:20 +0700 Subject: [PATCH 2/6] mailinfo: stop parsing options manually MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a later change, mailinfo will learn more options, let's switch to our robust parse_options framework before that step. Signed-off-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- builtin/mailinfo.c | 75 +++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index 77f96177ccd..f55549a0975 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -7,9 +7,13 @@ #include "utf8.h" #include "strbuf.h" #include "mailinfo.h" +#include "parse-options.h" -static const char mailinfo_usage[] = - "git mailinfo [-k | -b] [-m | --message-id] [-u | --encoding= | -n] [--scissors | --no-scissors] < mail >info"; +static const char * const mailinfo_usage[] = { + /* TRANSLATORS: keep <> in "<" mail ">" info. */ + N_("git mailinfo [] < mail >info"), + NULL, +}; struct metainfo_charset { @@ -21,6 +25,19 @@ struct metainfo_charset const char *charset; }; +static int parse_opt_explicit_encoding(const struct option *opt, + const char *arg, int unset) +{ + struct metainfo_charset *meta_charset = opt->value; + + BUG_ON_OPT_NEG(unset); + + meta_charset->policy = CHARSET_EXPLICIT; + meta_charset->charset = arg; + + return 0; +} + int cmd_mailinfo(int argc, const char **argv, const char *prefix) { struct metainfo_charset meta_charset; @@ -28,36 +45,34 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) int status; char *msgfile, *patchfile; + struct option options[] = { + OPT_BOOL('k', NULL, &mi.keep_subject, N_("keep subject")), + OPT_BOOL('b', NULL, &mi.keep_non_patch_brackets_in_subject, + N_("keep non patch brackets in subject")), + OPT_BOOL('m', "message-id", &mi.add_message_id, + N_("copy Message-ID to the end of commit message")), + OPT_SET_INT_F('u', NULL, &meta_charset.policy, + N_("re-code metadata to i18n.commitEncoding"), + CHARSET_DEFAULT, PARSE_OPT_NONEG), + OPT_SET_INT_F('n', NULL, &meta_charset.policy, + N_("disable charset re-coding of metadata"), + CHARSET_NO_REENCODE, PARSE_OPT_NONEG), + OPT_CALLBACK_F(0, "encoding", &meta_charset, N_("encoding"), + N_("re-code metadata to this encoding"), + PARSE_OPT_NONEG, parse_opt_explicit_encoding), + OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")), + OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers, + N_("use headers in message's body")), + OPT_END() + }; + setup_mailinfo(&mi); meta_charset.policy = CHARSET_DEFAULT; - while (1 < argc && argv[1][0] == '-') { - if (!strcmp(argv[1], "-k")) - mi.keep_subject = 1; - else if (!strcmp(argv[1], "-b")) - mi.keep_non_patch_brackets_in_subject = 1; - else if (!strcmp(argv[1], "-m") || !strcmp(argv[1], "--message-id")) - mi.add_message_id = 1; - else if (!strcmp(argv[1], "-u")) - meta_charset.policy = CHARSET_DEFAULT; - else if (!strcmp(argv[1], "-n")) - meta_charset.policy = CHARSET_NO_REENCODE; - else if (starts_with(argv[1], "--encoding=")) { - meta_charset.policy = CHARSET_EXPLICIT; - meta_charset.charset = argv[1] + 11; - } else if (!strcmp(argv[1], "--scissors")) - mi.use_scissors = 1; - else if (!strcmp(argv[1], "--no-scissors")) - mi.use_scissors = 0; - else if (!strcmp(argv[1], "--no-inbody-headers")) - mi.use_inbody_headers = 0; - else - usage(mailinfo_usage); - argc--; argv++; - } + argc = parse_options(argc, argv, prefix, options, mailinfo_usage, 0); - if (argc != 3) - usage(mailinfo_usage); + if (argc != 2) + usage_with_options(mailinfo_usage, options); switch (meta_charset.policy) { case CHARSET_DEFAULT: @@ -75,8 +90,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) mi.input = stdin; mi.output = stdout; - msgfile = prefix_filename(prefix, argv[1]); - patchfile = prefix_filename(prefix, argv[2]); + msgfile = prefix_filename(prefix, argv[0]); + patchfile = prefix_filename(prefix, argv[1]); status = !!mailinfo(&mi, msgfile, patchfile); clear_mailinfo(&mi); From 0b689562cafc05b1a36bdea3d025c9ecdf2514bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Mon, 10 May 2021 00:12:10 +0700 Subject: [PATCH 3/6] mailinfo: warn if CRLF found in decoded base64/QP email MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SMTP servers receive 8-bit email messages, possibly with only LF as line ending, some of them decide to change said LF to CRLF. Some mailing list softwares, when receive 8-bit email messages, decide to encode those messages in base64 or quoted-printable. If an email is transfered through above mail servers, then distributed by such mailing list softwares, the recipients will receive an email contains a patch mungled with CRLF encoded inside another encoding. Thus, such CR (in CRLF) couldn't be dropped by "mailsplit". Hence, the mailed patch couldn't be applied cleanly. Such accidents have been observed in the wild [1]. Instead of silently rejecting those messages, let's give our users some warnings if such CR (as part of CRLF) is found. [1]: https://nmbug.notmuchmail.org/nmweb/show/m2lf9ejegj.fsf%40guru.guru-group.fi Signed-off-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- mailinfo.c | 14 ++++++++++++ mailinfo.h | 1 + t/t5100-mailinfo.sh | 30 ++++++++++++++++++++++++++ t/t5100/quoted-cr-info | 5 +++++ t/t5100/quoted-cr-msg | 2 ++ t/t5100/quoted-cr-patch | 22 +++++++++++++++++++ t/t5100/quoted-cr.mbox | 47 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 121 insertions(+) create mode 100644 t/t5100/quoted-cr-info create mode 100644 t/t5100/quoted-cr-msg create mode 100644 t/t5100/quoted-cr-patch create mode 100644 t/t5100/quoted-cr.mbox diff --git a/mailinfo.c b/mailinfo.c index 5681d9130db..c8caee4f55b 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -994,6 +994,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, const char *rest; if (!mi->format_flowed) { + if (len >= 2 && + line->buf[len - 2] == '\r' && + line->buf[len - 1] == '\n') { + mi->have_quoted_cr = 1; + } handle_filter(mi, line); return; } @@ -1033,6 +1038,12 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, handle_filter(mi, line); } +static void summarize_quoted_cr(struct mailinfo *mi) +{ + if (mi->have_quoted_cr) + warning(_("quoted CRLF detected")); +} + static void handle_body(struct mailinfo *mi, struct strbuf *line) { struct strbuf prev = STRBUF_INIT; @@ -1051,6 +1062,8 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) handle_filter(mi, &prev); strbuf_reset(&prev); } + summarize_quoted_cr(mi); + mi->have_quoted_cr = 0; if (!handle_boundary(mi, line)) goto handle_body_out; } @@ -1100,6 +1113,7 @@ static void handle_body(struct mailinfo *mi, struct strbuf *line) if (prev.len) handle_filter(mi, &prev); + summarize_quoted_cr(mi); flush_inbody_header_accum(mi); diff --git a/mailinfo.h b/mailinfo.h index 79b1d6774ec..b394ef9bcee 100644 --- a/mailinfo.h +++ b/mailinfo.h @@ -24,6 +24,7 @@ struct mailinfo { struct strbuf charset; unsigned int format_flowed:1; unsigned int delsp:1; + unsigned int have_quoted_cr:1; char *message_id; enum { TE_DONTCARE, TE_QP, TE_BASE64 diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index 147e616533c..ac6fbfe596c 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -228,4 +228,34 @@ test_expect_success 'mailinfo handles unusual header whitespace' ' test_cmp expect actual ' +check_quoted_cr_mail () { + mail="$1" && shift && + git mailinfo -u "$@" "$mail.msg" "$mail.patch" \ + <"$mail" >"$mail.info" 2>"$mail.err" && + test_cmp "$mail-expected.msg" "$mail.msg" && + test_cmp "$mail-expected.patch" "$mail.patch" && + test_cmp "$DATA/quoted-cr-info" "$mail.info" +} + +test_expect_success 'split base64 email with quoted-cr' ' + mkdir quoted-cr && + git mailsplit -oquoted-cr "$DATA/quoted-cr.mbox" >quoted-cr/last && + test $(cat quoted-cr/last) = 2 +' + +test_expect_success 'mailinfo warn CR in base64 encoded email' ' + sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \ + >quoted-cr/0001-expected.msg && + sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-msg" \ + >quoted-cr/0002-expected.msg && + sed -e "s/%%$//" -e "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \ + >quoted-cr/0001-expected.patch && + sed "s/%%/$(printf \\015)/g" "$DATA/quoted-cr-patch" \ + >quoted-cr/0002-expected.patch && + check_quoted_cr_mail quoted-cr/0001 && + test_must_be_empty quoted-cr/0001.err && + check_quoted_cr_mail quoted-cr/0002 && + grep "quoted CRLF detected" quoted-cr/0002.err +' + test_done diff --git a/t/t5100/quoted-cr-info b/t/t5100/quoted-cr-info new file mode 100644 index 00000000000..dab2228b70a --- /dev/null +++ b/t/t5100/quoted-cr-info @@ -0,0 +1,5 @@ +Author: A U Thor +Email: mail@example.com +Subject: sample +Date: Mon, 3 Aug 2020 22:40:55 +0700 + diff --git a/t/t5100/quoted-cr-msg b/t/t5100/quoted-cr-msg new file mode 100644 index 00000000000..89b05a07844 --- /dev/null +++ b/t/t5100/quoted-cr-msg @@ -0,0 +1,2 @@ +On different distro, %%pytest is suffixed with different patterns.%% +%% diff --git a/t/t5100/quoted-cr-patch b/t/t5100/quoted-cr-patch new file mode 100644 index 00000000000..65b13eeef70 --- /dev/null +++ b/t/t5100/quoted-cr-patch @@ -0,0 +1,22 @@ +---%% + configure | 2 +-%% + 1 file changed, 1 insertion(+), 1 deletion(-)%% +%% +diff --git a/configure b/configure%% +index db3538b3..f7c1c095 100755%% +--- a/configure%% ++++ b/configure%% +@@ -814,7 +814,7 @@ if [ $have_python3 -eq 1 ]; then%% + printf "%%Checking for python3 pytest (>= 3.0)... "%% + conf=$(mktemp)%% + printf "[pytest]\nminversion=3.0\n" > $conf%% +- if pytest-3 -c $conf --version >/dev/null 2>&1; then%% ++ if "$python" -m pytest -c $conf --version >/dev/null 2>&1; then%% + printf "Yes.\n"%% + have_python3_pytest=1%% + else%% +-- %% +2.28.0%% +_______________________________________________ +example mailing list -- list@example.org +To unsubscribe send an email to list-leave@example.org diff --git a/t/t5100/quoted-cr.mbox b/t/t5100/quoted-cr.mbox new file mode 100644 index 00000000000..909021bb7a8 --- /dev/null +++ b/t/t5100/quoted-cr.mbox @@ -0,0 +1,47 @@ +From nobody Mon Sep 17 00:00:00 2001 +From: A U Thor +To: list@example.org +Subject: [PATCH v2] sample +Date: Mon, 3 Aug 2020 22:40:55 +0700 +Message-Id: +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw +YXR0ZXJucy4KCi0tLQogY29uZmlndXJlIHwgMiArLQogMSBmaWxlIGNoYW5nZWQsIDEgaW5zZXJ0 +aW9uKCspLCAxIGRlbGV0aW9uKC0pCgpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29uZmlndXJl +CmluZGV4IGRiMzUzOGIzLi5mN2MxYzA5NSAxMDA3NTUKLS0tIGEvY29uZmlndXJlCisrKyBiL2Nv +bmZpZ3VyZQpAQCAtODE0LDcgKzgxNCw3IEBAIGlmIFsgJGhhdmVfcHl0aG9uMyAtZXEgMSBdOyB0 +aGVuCiAgICAgcHJpbnRmICINQ2hlY2tpbmcgZm9yIHB5dGhvbjMgcHl0ZXN0ICg+PSAzLjApLi4u +ICIKICAgICBjb25mPSQobWt0ZW1wKQogICAgIHByaW50ZiAiW3B5dGVzdF1cbm1pbnZlcnNpb249 +My4wXG4iID4gJGNvbmYKLSAgICBpZiBweXRlc3QtMyAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYv +bnVsbCAyPiYxOyB0aGVuCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVzdCAtYyAkY29uZiAtLXZl +cnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuCiAgICAgICAgIHByaW50ZiAiWWVzLlxuIgogICAg +ICAgICBoYXZlX3B5dGhvbjNfcHl0ZXN0PTEKICAgICBlbHNlCi0tIAoyLjI4LjAKX19fX19fX19f +X19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX18KZXhhbXBsZSBtYWlsaW5nIGxp +c3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmliZSBzZW5kIGFuIGVtYWlsIHRvIGxp +c3QtbGVhdmVAZXhhbXBsZS5vcmcK + +From nobody Mon Sep 17 00:00:00 2001 +From: A U Thor +To: list@example.org +Subject: [PATCH v2] sample +Date: Mon, 3 Aug 2020 22:40:55 +0700 +Message-Id: +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +T24gZGlmZmVyZW50IGRpc3RybywgDXB5dGVzdCBpcyBzdWZmaXhlZCB3aXRoIGRpZmZlcmVudCBw +YXR0ZXJucy4NCg0KLS0tDQogY29uZmlndXJlIHwgMiArLQ0KIDEgZmlsZSBjaGFuZ2VkLCAxIGlu +c2VydGlvbigrKSwgMSBkZWxldGlvbigtKQ0KDQpkaWZmIC0tZ2l0IGEvY29uZmlndXJlIGIvY29u +ZmlndXJlDQppbmRleCBkYjM1MzhiMy4uZjdjMWMwOTUgMTAwNzU1DQotLS0gYS9jb25maWd1cmUN +CisrKyBiL2NvbmZpZ3VyZQ0KQEAgLTgxNCw3ICs4MTQsNyBAQCBpZiBbICRoYXZlX3B5dGhvbjMg +LWVxIDEgXTsgdGhlbg0KICAgICBwcmludGYgIg1DaGVja2luZyBmb3IgcHl0aG9uMyBweXRlc3Qg +KD49IDMuMCkuLi4gIg0KICAgICBjb25mPSQobWt0ZW1wKQ0KICAgICBwcmludGYgIltweXRlc3Rd +XG5taW52ZXJzaW9uPTMuMFxuIiA+ICRjb25mDQotICAgIGlmIHB5dGVzdC0zIC1jICRjb25mIC0t +dmVyc2lvbiA+L2Rldi9udWxsIDI+JjE7IHRoZW4NCisgICAgaWYgIiRweXRob24iIC1tIHB5dGVz +dCAtYyAkY29uZiAtLXZlcnNpb24gPi9kZXYvbnVsbCAyPiYxOyB0aGVuDQogICAgICAgICBwcmlu +dGYgIlllcy5cbiINCiAgICAgICAgIGhhdmVfcHl0aG9uM19weXRlc3Q9MQ0KICAgICBlbHNlDQot +LSANCjIuMjguMA0KX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19fX19f +X18KZXhhbXBsZSBtYWlsaW5nIGxpc3QgLS0gbGlzdEBleGFtcGxlLm9yZwpUbyB1bnN1YnNjcmli +ZSBzZW5kIGFuIGVtYWlsIHRvIGxpc3QtbGVhdmVAZXhhbXBsZS5vcmcK From f1aa29944320e51441e5b5e32591e69f2fa74de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Mon, 10 May 2021 00:12:11 +0700 Subject: [PATCH 4/6] mailinfo: allow squelching quoted CRLF warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In previous change, Git starts to warn for quoted CRLF in decoded base64/QP email. Despite those warnings are usually helpful, quoted CRLF could be part of some users' workflow. Let's give them an option to turn off the warning completely. Signed-off-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- Documentation/git-mailinfo.txt | 20 +++++++++++++++++++- builtin/mailinfo.c | 12 ++++++++++++ mailinfo.c | 20 +++++++++++++++++++- mailinfo.h | 7 +++++++ t/t5100-mailinfo.sh | 6 +++++- 5 files changed, 62 insertions(+), 3 deletions(-) diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt index d343f040f53..824947a0703 100644 --- a/Documentation/git-mailinfo.txt +++ b/Documentation/git-mailinfo.txt @@ -9,7 +9,9 @@ git-mailinfo - Extracts patch and authorship from a single e-mail message SYNOPSIS -------- [verse] -'git mailinfo' [-k|-b] [-u | --encoding= | -n] [--[no-]scissors] +'git mailinfo' [-k|-b] [-u | --encoding= | -n] + [--[no-]scissors] [--quoted-cr=] + DESCRIPTION @@ -89,6 +91,22 @@ This can be enabled by default with the configuration option mailinfo.scissors. --no-scissors:: Ignore scissors lines. Useful for overriding mailinfo.scissors settings. +--quoted-cr=:: + Action when processes email messages sent with base64 or + quoted-printable encoding, and the decoded lines end with a CRLF + instead of a simple LF. ++ +The valid actions are: ++ +-- +* `nowarn`: Git will do nothing when such a CRLF is found. +* `warn`: Git will issue a warning for each message if such a CRLF is + found. +-- ++ +The default action could be set by configuration option `mailinfo.quotedCR`. +If no such configuration option has been set, `warn` will be used. + :: The commit log message extracted from e-mail, usually except the title line which comes from e-mail Subject. diff --git a/builtin/mailinfo.c b/builtin/mailinfo.c index f55549a0975..01d16ef9e5a 100644 --- a/builtin/mailinfo.c +++ b/builtin/mailinfo.c @@ -38,6 +38,15 @@ static int parse_opt_explicit_encoding(const struct option *opt, return 0; } +static int parse_opt_quoted_cr(const struct option *opt, const char *arg, int unset) +{ + BUG_ON_OPT_NEG(unset); + + if (mailinfo_parse_quoted_cr_action(arg, opt->value) != 0) + return error(_("bad action '%s' for '%s'"), arg, "--quoted-cr"); + return 0; +} + int cmd_mailinfo(int argc, const char **argv, const char *prefix) { struct metainfo_charset meta_charset; @@ -61,6 +70,9 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix) N_("re-code metadata to this encoding"), PARSE_OPT_NONEG, parse_opt_explicit_encoding), OPT_BOOL(0, "scissors", &mi.use_scissors, N_("use scissors")), + OPT_CALLBACK_F(0, "quoted-cr", &mi.quoted_cr, N_(""), + N_("action when quoted CR is found"), + PARSE_OPT_NONEG, parse_opt_quoted_cr), OPT_HIDDEN_BOOL(0, "inbody-headers", &mi.use_inbody_headers, N_("use headers in message's body")), OPT_END() diff --git a/mailinfo.c b/mailinfo.c index c8caee4f55b..a784552c7b2 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -1040,7 +1040,8 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, static void summarize_quoted_cr(struct mailinfo *mi) { - if (mi->have_quoted_cr) + if (mi->have_quoted_cr && + mi->quoted_cr == quoted_cr_warn) warning(_("quoted CRLF detected")); } @@ -1220,6 +1221,17 @@ int mailinfo(struct mailinfo *mi, const char *msg, const char *patch) return mi->input_error; } +int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action) +{ + if (!strcmp(actionstr, "nowarn")) + *action = quoted_cr_nowarn; + else if (!strcmp(actionstr, "warn")) + *action = quoted_cr_warn; + else + return -1; + return 0; +} + static int git_mailinfo_config(const char *var, const char *value, void *mi_) { struct mailinfo *mi = mi_; @@ -1230,6 +1242,11 @@ static int git_mailinfo_config(const char *var, const char *value, void *mi_) mi->use_scissors = git_config_bool(var, value); return 0; } + if (!strcmp(var, "mailinfo.quotedcr")) { + if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0) + return error(_("bad action '%s' for '%s'"), value, var); + return 0; + } /* perhaps others here */ return 0; } @@ -1242,6 +1259,7 @@ void setup_mailinfo(struct mailinfo *mi) strbuf_init(&mi->charset, 0); strbuf_init(&mi->log_message, 0); strbuf_init(&mi->inbody_header_accum, 0); + mi->quoted_cr = quoted_cr_warn; mi->header_stage = 1; mi->use_inbody_headers = 1; mi->content_top = mi->content; diff --git a/mailinfo.h b/mailinfo.h index b394ef9bcee..768d06ac2a8 100644 --- a/mailinfo.h +++ b/mailinfo.h @@ -5,6 +5,11 @@ #define MAX_BOUNDARIES 5 +enum quoted_cr_action { + quoted_cr_nowarn, + quoted_cr_warn, +}; + struct mailinfo { FILE *input; FILE *output; @@ -14,6 +19,7 @@ struct mailinfo { struct strbuf email; int keep_subject; int keep_non_patch_brackets_in_subject; + int quoted_cr; /* enum quoted_cr_action */ int add_message_id; int use_scissors; int use_inbody_headers; @@ -40,6 +46,7 @@ struct mailinfo { int input_error; }; +int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action); void setup_mailinfo(struct mailinfo *); int mailinfo(struct mailinfo *, const char *msg, const char *patch); void clear_mailinfo(struct mailinfo *); diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index ac6fbfe596c..1ecefa381d9 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -255,7 +255,11 @@ test_expect_success 'mailinfo warn CR in base64 encoded email' ' check_quoted_cr_mail quoted-cr/0001 && test_must_be_empty quoted-cr/0001.err && check_quoted_cr_mail quoted-cr/0002 && - grep "quoted CRLF detected" quoted-cr/0002.err + grep "quoted CRLF detected" quoted-cr/0002.err && + check_quoted_cr_mail quoted-cr/0001 --quoted-cr=nowarn && + test_must_be_empty quoted-cr/0001.err && + check_quoted_cr_mail quoted-cr/0002 --quoted-cr=nowarn && + test_must_be_empty quoted-cr/0002.err ' test_done From 133a4fda59e7e68be13191082e14be32143bd61b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Mon, 10 May 2021 00:12:12 +0700 Subject: [PATCH 5/6] mailinfo: allow stripping quoted CR without warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In previous changes, we've turned on warning for quoted CR in base64 or quoted-printable email messages. Some projects see those quoted CR a lot, they know that it happens most of the time, and they find it's desirable to always strip those CR. Those projects in question usually fall back to use other tools to handle patches when receive such patches. Let's help those projects handle those patches by stripping those excessive CR. Signed-off-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- Documentation/git-mailinfo.txt | 1 + mailinfo.c | 7 +++++++ mailinfo.h | 1 + t/t5100-mailinfo.sh | 6 ++++++ 4 files changed, 15 insertions(+) diff --git a/Documentation/git-mailinfo.txt b/Documentation/git-mailinfo.txt index 824947a0703..3fcfd965fde 100644 --- a/Documentation/git-mailinfo.txt +++ b/Documentation/git-mailinfo.txt @@ -102,6 +102,7 @@ The valid actions are: * `nowarn`: Git will do nothing when such a CRLF is found. * `warn`: Git will issue a warning for each message if such a CRLF is found. +* `strip`: Git will convert those CRLF to LF. -- + The default action could be set by configuration option `mailinfo.quotedCR`. diff --git a/mailinfo.c b/mailinfo.c index a784552c7b2..ed863c3a952 100644 --- a/mailinfo.c +++ b/mailinfo.c @@ -998,6 +998,11 @@ static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line, line->buf[len - 2] == '\r' && line->buf[len - 1] == '\n') { mi->have_quoted_cr = 1; + if (mi->quoted_cr == quoted_cr_strip) { + strbuf_setlen(line, len - 2); + strbuf_addch(line, '\n'); + len--; + } } handle_filter(mi, line); return; @@ -1227,6 +1232,8 @@ int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action) *action = quoted_cr_nowarn; else if (!strcmp(actionstr, "warn")) *action = quoted_cr_warn; + else if (!strcmp(actionstr, "strip")) + *action = quoted_cr_strip; else return -1; return 0; diff --git a/mailinfo.h b/mailinfo.h index 768d06ac2a8..2ddf8be90ff 100644 --- a/mailinfo.h +++ b/mailinfo.h @@ -8,6 +8,7 @@ enum quoted_cr_action { quoted_cr_nowarn, quoted_cr_warn, + quoted_cr_strip, }; struct mailinfo { diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index 1ecefa381d9..141b29f0319 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -259,6 +259,12 @@ test_expect_success 'mailinfo warn CR in base64 encoded email' ' check_quoted_cr_mail quoted-cr/0001 --quoted-cr=nowarn && test_must_be_empty quoted-cr/0001.err && check_quoted_cr_mail quoted-cr/0002 --quoted-cr=nowarn && + test_must_be_empty quoted-cr/0002.err && + cp quoted-cr/0001-expected.msg quoted-cr/0002-expected.msg && + cp quoted-cr/0001-expected.patch quoted-cr/0002-expected.patch && + check_quoted_cr_mail quoted-cr/0001 --quoted-cr=strip && + test_must_be_empty quoted-cr/0001.err && + check_quoted_cr_mail quoted-cr/0002 --quoted-cr=strip && test_must_be_empty quoted-cr/0002.err ' From 59b519ab7e703ebdbad06bd9ba3cae6e55d880d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Mon, 10 May 2021 00:12:13 +0700 Subject: [PATCH 6/6] am: learn to process quoted lines that ends with CRLF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In previous changes, mailinfo has learnt to process lines that decoded from base64 or quoted-printable, and ends with CRLF. Let's teach "am" that new trick, too. Signed-off-by: Đoàn Trần Công Danh Signed-off-by: Junio C Hamano --- Documentation/git-am.txt | 4 ++ builtin/am.c | 51 ++++++++++++++++++++++++++ contrib/completion/git-completion.bash | 5 +++ mailinfo.h | 1 + t/t4258-am-quoted-cr.sh | 37 +++++++++++++++++++ t/t4258/mbox | 12 ++++++ 6 files changed, 110 insertions(+) create mode 100755 t/t4258-am-quoted-cr.sh create mode 100644 t/t4258/mbox diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt index decd8ae1227..8714dfcb762 100644 --- a/Documentation/git-am.txt +++ b/Documentation/git-am.txt @@ -15,6 +15,7 @@ SYNOPSIS [--whitespace=