1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-05 17:36:18 +02:00
git/trailer.c

1173 lines
30 KiB
C
Raw Permalink Normal View History

#include "git-compat-util.h"
#include "config.h"
#include "environment.h"
#include "gettext.h"
#include "string-list.h"
#include "run-command.h"
#include "commit.h"
#include "trailer.h"
#include "list.h"
/*
* Copyright (c) 2013, 2014 Christian Couder <chriscool@tuxfamily.org>
*/
struct conf_info {
char *name;
char *key;
char *command;
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
char *cmd;
enum trailer_where where;
enum trailer_if_exists if_exists;
enum trailer_if_missing if_missing;
};
static struct conf_info default_conf_info;
struct trailer_item {
struct list_head list;
/*
* If this is not a trailer line, the line is stored in value
* (excluding the terminating newline) and token is NULL.
*/
char *token;
char *value;
};
struct arg_item {
struct list_head list;
char *token;
char *value;
struct conf_info conf;
};
static LIST_HEAD(conf_head);
static char *separators = ":";
static int configured;
#define TRAILER_ARG_STRING "$ARG"
static const char *git_generated_prefixes[] = {
"Signed-off-by: ",
"(cherry picked from commit ",
NULL
};
/* Iterate over the elements of the list. */
#define list_for_each_dir(pos, head, is_reverse) \
for (pos = is_reverse ? (head)->prev : (head)->next; \
pos != (head); \
pos = is_reverse ? pos->prev : pos->next)
static int after_or_end(enum trailer_where where)
{
return (where == WHERE_AFTER) || (where == WHERE_END);
}
/*
* Return the length of the string not including any final
* punctuation. E.g., the input "Signed-off-by:" would return
* 13, stripping the trailing punctuation but retaining
* internal punctuation.
*/
static size_t token_len_without_separator(const char *token, size_t len)
{
while (len > 0 && !isalnum(token[len - 1]))
len--;
return len;
}
static int same_token(struct trailer_item *a, struct arg_item *b)
{
size_t a_len, b_len, min_len;
if (!a->token)
return 0;
a_len = token_len_without_separator(a->token, strlen(a->token));
b_len = token_len_without_separator(b->token, strlen(b->token));
min_len = (a_len > b_len) ? b_len : a_len;
return !strncasecmp(a->token, b->token, min_len);
}
static int same_value(struct trailer_item *a, struct arg_item *b)
{
return !strcasecmp(a->value, b->value);
}
static int same_trailer(struct trailer_item *a, struct arg_item *b)
{
return same_token(a, b) && same_value(a, b);
}
static inline int is_blank_line(const char *str)
{
const char *s = str;
while (*s && *s != '\n' && isspace(*s))
s++;
return !*s || *s == '\n';
}
static inline void strbuf_replace(struct strbuf *sb, const char *a, const char *b)
{
const char *ptr = strstr(sb->buf, a);
if (ptr)
strbuf_splice(sb, ptr - sb->buf, strlen(a), b, strlen(b));
}
static void free_trailer_item(struct trailer_item *item)
{
free(item->token);
free(item->value);
free(item);
}
static void free_arg_item(struct arg_item *item)
{
free(item->conf.name);
free(item->conf.key);
free(item->conf.command);
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
free(item->conf.cmd);
free(item->token);
free(item->value);
free(item);
}
static char last_non_space_char(const char *s)
{
int i;
for (i = strlen(s) - 1; i >= 0; i--)
if (!isspace(s[i]))
return s[i];
return '\0';
}
static struct trailer_item *trailer_from_arg(struct arg_item *arg_tok)
{
struct trailer_item *new_item = xcalloc(1, sizeof(*new_item));
new_item->token = arg_tok->token;
new_item->value = arg_tok->value;
arg_tok->token = arg_tok->value = NULL;
free_arg_item(arg_tok);
return new_item;
}
static void add_arg_to_input_list(struct trailer_item *on_tok,
struct arg_item *arg_tok)
{
int aoe = after_or_end(arg_tok->conf.where);
struct trailer_item *to_add = trailer_from_arg(arg_tok);
if (aoe)
list_add(&to_add->list, &on_tok->list);
else
list_add_tail(&to_add->list, &on_tok->list);
}
static int check_if_different(struct trailer_item *in_tok,
struct arg_item *arg_tok,
int check_all,
struct list_head *head)
{
enum trailer_where where = arg_tok->conf.where;
struct list_head *next_head;
do {
if (same_trailer(in_tok, arg_tok))
return 0;
/*
* if we want to add a trailer after another one,
* we have to check those before this one
*/
next_head = after_or_end(where) ? in_tok->list.prev
: in_tok->list.next;
if (next_head == head)
break;
in_tok = list_entry(next_head, struct trailer_item, list);
} while (check_all);
return 1;
}
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
static char *apply_command(struct conf_info *conf, const char *arg)
{
struct strbuf cmd = STRBUF_INIT;
struct strbuf buf = STRBUF_INIT;
struct child_process cp = CHILD_PROCESS_INIT;
char *result;
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
if (conf->cmd) {
strbuf_addstr(&cmd, conf->cmd);
strvec_push(&cp.args, cmd.buf);
if (arg)
strvec_push(&cp.args, arg);
} else if (conf->command) {
strbuf_addstr(&cmd, conf->command);
if (arg)
strbuf_replace(&cmd, TRAILER_ARG_STRING, arg);
strvec_push(&cp.args, cmd.buf);
}
strvec_pushv(&cp.env, (const char **)local_repo_env);
cp.no_stdin = 1;
cp.use_shell = 1;
if (capture_command(&cp, &buf, 1024)) {
error(_("running trailer command '%s' failed"), cmd.buf);
strbuf_release(&buf);
result = xstrdup("");
} else {
strbuf_trim(&buf);
result = strbuf_detach(&buf, NULL);
}
strbuf_release(&cmd);
return result;
}
static void apply_item_command(struct trailer_item *in_tok, struct arg_item *arg_tok)
{
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
if (arg_tok->conf.command || arg_tok->conf.cmd) {
const char *arg;
if (arg_tok->value && arg_tok->value[0]) {
arg = arg_tok->value;
} else {
if (in_tok && in_tok->value)
arg = xstrdup(in_tok->value);
else
arg = xstrdup("");
}
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
arg_tok->value = apply_command(&arg_tok->conf, arg);
free((char *)arg);
}
}
static void apply_arg_if_exists(struct trailer_item *in_tok,
struct arg_item *arg_tok,
struct trailer_item *on_tok,
struct list_head *head)
{
switch (arg_tok->conf.if_exists) {
case EXISTS_DO_NOTHING:
free_arg_item(arg_tok);
break;
case EXISTS_REPLACE:
apply_item_command(in_tok, arg_tok);
add_arg_to_input_list(on_tok, arg_tok);
list_del(&in_tok->list);
free_trailer_item(in_tok);
break;
case EXISTS_ADD:
apply_item_command(in_tok, arg_tok);
add_arg_to_input_list(on_tok, arg_tok);
break;
case EXISTS_ADD_IF_DIFFERENT:
apply_item_command(in_tok, arg_tok);
if (check_if_different(in_tok, arg_tok, 1, head))
add_arg_to_input_list(on_tok, arg_tok);
else
free_arg_item(arg_tok);
break;
case EXISTS_ADD_IF_DIFFERENT_NEIGHBOR:
apply_item_command(in_tok, arg_tok);
if (check_if_different(on_tok, arg_tok, 0, head))
add_arg_to_input_list(on_tok, arg_tok);
else
free_arg_item(arg_tok);
break;
default:
BUG("trailer.c: unhandled value %d",
arg_tok->conf.if_exists);
}
}
static void apply_arg_if_missing(struct list_head *head,
struct arg_item *arg_tok)
{
enum trailer_where where;
struct trailer_item *to_add;
switch (arg_tok->conf.if_missing) {
case MISSING_DO_NOTHING:
free_arg_item(arg_tok);
break;
case MISSING_ADD:
where = arg_tok->conf.where;
apply_item_command(NULL, arg_tok);
to_add = trailer_from_arg(arg_tok);
if (after_or_end(where))
list_add_tail(&to_add->list, head);
else
list_add(&to_add->list, head);
break;
default:
BUG("trailer.c: unhandled value %d",
arg_tok->conf.if_missing);
}
}
static int find_same_and_apply_arg(struct list_head *head,
struct arg_item *arg_tok)
{
struct list_head *pos;
struct trailer_item *in_tok;
struct trailer_item *on_tok;
enum trailer_where where = arg_tok->conf.where;
int middle = (where == WHERE_AFTER) || (where == WHERE_BEFORE);
int backwards = after_or_end(where);
struct trailer_item *start_tok;
if (list_empty(head))
return 0;
start_tok = list_entry(backwards ? head->prev : head->next,
struct trailer_item,
list);
list_for_each_dir(pos, head, backwards) {
in_tok = list_entry(pos, struct trailer_item, list);
if (!same_token(in_tok, arg_tok))
continue;
on_tok = middle ? in_tok : start_tok;
apply_arg_if_exists(in_tok, arg_tok, on_tok, head);
return 1;
}
return 0;
}
void process_trailers_lists(struct list_head *head,
struct list_head *arg_head)
{
struct list_head *pos, *p;
struct arg_item *arg_tok;
list_for_each_safe(pos, p, arg_head) {
int applied = 0;
arg_tok = list_entry(pos, struct arg_item, list);
list_del(pos);
applied = find_same_and_apply_arg(head, arg_tok);
if (!applied)
apply_arg_if_missing(head, arg_tok);
}
}
int trailer_set_where(enum trailer_where *item, const char *value)
{
if (!value)
*item = WHERE_DEFAULT;
else if (!strcasecmp("after", value))
*item = WHERE_AFTER;
else if (!strcasecmp("before", value))
*item = WHERE_BEFORE;
else if (!strcasecmp("end", value))
*item = WHERE_END;
else if (!strcasecmp("start", value))
*item = WHERE_START;
else
return -1;
return 0;
}
int trailer_set_if_exists(enum trailer_if_exists *item, const char *value)
{
if (!value)
*item = EXISTS_DEFAULT;
else if (!strcasecmp("addIfDifferent", value))
*item = EXISTS_ADD_IF_DIFFERENT;
else if (!strcasecmp("addIfDifferentNeighbor", value))
*item = EXISTS_ADD_IF_DIFFERENT_NEIGHBOR;
else if (!strcasecmp("add", value))
*item = EXISTS_ADD;
else if (!strcasecmp("replace", value))
*item = EXISTS_REPLACE;
else if (!strcasecmp("doNothing", value))
*item = EXISTS_DO_NOTHING;
else
return -1;
return 0;
}
int trailer_set_if_missing(enum trailer_if_missing *item, const char *value)
{
if (!value)
*item = MISSING_DEFAULT;
else if (!strcasecmp("doNothing", value))
*item = MISSING_DO_NOTHING;
else if (!strcasecmp("add", value))
*item = MISSING_ADD;
else
return -1;
return 0;
}
static void duplicate_conf(struct conf_info *dst, const struct conf_info *src)
{
*dst = *src;
dst->name = xstrdup_or_null(src->name);
dst->key = xstrdup_or_null(src->key);
dst->command = xstrdup_or_null(src->command);
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
dst->cmd = xstrdup_or_null(src->cmd);
}
static struct arg_item *get_conf_item(const char *name)
{
struct list_head *pos;
struct arg_item *item;
/* Look up item with same name */
list_for_each(pos, &conf_head) {
item = list_entry(pos, struct arg_item, list);
if (!strcasecmp(item->conf.name, name))
return item;
}
/* Item does not already exists, create it */
CALLOC_ARRAY(item, 1);
duplicate_conf(&item->conf, &default_conf_info);
item->conf.name = xstrdup(name);
list_add_tail(&item->list, &conf_head);
return item;
}
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
enum trailer_info_type { TRAILER_KEY, TRAILER_COMMAND, TRAILER_CMD,
TRAILER_WHERE, TRAILER_IF_EXISTS, TRAILER_IF_MISSING };
static struct {
const char *name;
enum trailer_info_type type;
} trailer_config_items[] = {
{ "key", TRAILER_KEY },
{ "command", TRAILER_COMMAND },
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
{ "cmd", TRAILER_CMD },
{ "where", TRAILER_WHERE },
{ "ifexists", TRAILER_IF_EXISTS },
{ "ifmissing", TRAILER_IF_MISSING }
};
static int git_trailer_default_config(const char *conf_key, const char *value,
config: add ctx arg to config_fn_t Add a new "const struct config_context *ctx" arg to config_fn_t to hold additional information about the config iteration operation. config_context has a "struct key_value_info kvi" member that holds metadata about the config source being read (e.g. what kind of config source it is, the filename, etc). In this series, we're only interested in .kvi, so we could have just used "struct key_value_info" as an arg, but config_context makes it possible to add/adjust members in the future without changing the config_fn_t signature. We could also consider other ways of organizing the args (e.g. moving the config name and value into config_context or key_value_info), but in my experiments, the incremental benefit doesn't justify the added complexity (e.g. a config_fn_t will sometimes invoke another config_fn_t but with a different config value). In subsequent commits, the .kvi member will replace the global "struct config_reader" in config.c, making config iteration a global-free operation. It requires much more work for the machinery to provide meaningful values of .kvi, so for now, merely change the signature and call sites, pass NULL as a placeholder value, and don't rely on the arg in any meaningful way. Most of the changes are performed by contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every config_fn_t: - Modifies the signature to accept "const struct config_context *ctx" - Passes "ctx" to any inner config_fn_t, if needed - Adds UNUSED attributes to "ctx", if needed Most config_fn_t instances are easily identified by seeing if they are called by the various config functions. Most of the remaining ones are manually named in the .cocci patch. Manual cleanups are still needed, but the majority of it is trivial; it's either adjusting config_fn_t that the .cocci patch didn't catch, or adding forward declarations of "struct config_context ctx" to make the signatures make sense. The non-trivial changes are in cases where we are invoking a config_fn_t outside of config machinery, and we now need to decide what value of "ctx" to pass. These cases are: - trace2/tr2_cfg.c:tr2_cfg_set_fl() This is indirectly called by git_config_set() so that the trace2 machinery can notice the new config values and update its settings using the tr2 config parsing function, i.e. tr2_cfg_cb(). - builtin/checkout.c:checkout_main() This calls git_xmerge_config() as a shorthand for parsing a CLI arg. This might be worth refactoring away in the future, since git_xmerge_config() can call git_default_config(), which can do much more than just parsing. Handle them by creating a KVI_INIT macro that initializes "struct key_value_info" to a reasonable default, and use that to construct the "ctx" arg. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 21:26:22 +02:00
const struct config_context *ctx UNUSED,
void *cb UNUSED)
{
const char *trailer_item, *variable_name;
if (!skip_prefix(conf_key, "trailer.", &trailer_item))
return 0;
variable_name = strrchr(trailer_item, '.');
if (!variable_name) {
if (!strcmp(trailer_item, "where")) {
if (trailer_set_where(&default_conf_info.where,
value) < 0)
warning(_("unknown value '%s' for key '%s'"),
value, conf_key);
} else if (!strcmp(trailer_item, "ifexists")) {
if (trailer_set_if_exists(&default_conf_info.if_exists,
value) < 0)
warning(_("unknown value '%s' for key '%s'"),
value, conf_key);
} else if (!strcmp(trailer_item, "ifmissing")) {
if (trailer_set_if_missing(&default_conf_info.if_missing,
value) < 0)
warning(_("unknown value '%s' for key '%s'"),
value, conf_key);
} else if (!strcmp(trailer_item, "separators")) {
if (!value)
return config_error_nonbool(conf_key);
separators = xstrdup(value);
}
}
return 0;
}
static int git_trailer_config(const char *conf_key, const char *value,
config: add ctx arg to config_fn_t Add a new "const struct config_context *ctx" arg to config_fn_t to hold additional information about the config iteration operation. config_context has a "struct key_value_info kvi" member that holds metadata about the config source being read (e.g. what kind of config source it is, the filename, etc). In this series, we're only interested in .kvi, so we could have just used "struct key_value_info" as an arg, but config_context makes it possible to add/adjust members in the future without changing the config_fn_t signature. We could also consider other ways of organizing the args (e.g. moving the config name and value into config_context or key_value_info), but in my experiments, the incremental benefit doesn't justify the added complexity (e.g. a config_fn_t will sometimes invoke another config_fn_t but with a different config value). In subsequent commits, the .kvi member will replace the global "struct config_reader" in config.c, making config iteration a global-free operation. It requires much more work for the machinery to provide meaningful values of .kvi, so for now, merely change the signature and call sites, pass NULL as a placeholder value, and don't rely on the arg in any meaningful way. Most of the changes are performed by contrib/coccinelle/config_fn_ctx.pending.cocci, which, for every config_fn_t: - Modifies the signature to accept "const struct config_context *ctx" - Passes "ctx" to any inner config_fn_t, if needed - Adds UNUSED attributes to "ctx", if needed Most config_fn_t instances are easily identified by seeing if they are called by the various config functions. Most of the remaining ones are manually named in the .cocci patch. Manual cleanups are still needed, but the majority of it is trivial; it's either adjusting config_fn_t that the .cocci patch didn't catch, or adding forward declarations of "struct config_context ctx" to make the signatures make sense. The non-trivial changes are in cases where we are invoking a config_fn_t outside of config machinery, and we now need to decide what value of "ctx" to pass. These cases are: - trace2/tr2_cfg.c:tr2_cfg_set_fl() This is indirectly called by git_config_set() so that the trace2 machinery can notice the new config values and update its settings using the tr2 config parsing function, i.e. tr2_cfg_cb(). - builtin/checkout.c:checkout_main() This calls git_xmerge_config() as a shorthand for parsing a CLI arg. This might be worth refactoring away in the future, since git_xmerge_config() can call git_default_config(), which can do much more than just parsing. Handle them by creating a KVI_INIT macro that initializes "struct key_value_info" to a reasonable default, and use that to construct the "ctx" arg. Signed-off-by: Glen Choo <chooglen@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-28 21:26:22 +02:00
const struct config_context *ctx UNUSED,
void *cb UNUSED)
{
const char *trailer_item, *variable_name;
struct arg_item *item;
struct conf_info *conf;
char *name = NULL;
enum trailer_info_type type;
int i;
if (!skip_prefix(conf_key, "trailer.", &trailer_item))
return 0;
variable_name = strrchr(trailer_item, '.');
if (!variable_name)
return 0;
variable_name++;
for (i = 0; i < ARRAY_SIZE(trailer_config_items); i++) {
if (strcmp(trailer_config_items[i].name, variable_name))
continue;
name = xstrndup(trailer_item, variable_name - trailer_item - 1);
type = trailer_config_items[i].type;
break;
}
if (!name)
return 0;
item = get_conf_item(name);
conf = &item->conf;
free(name);
switch (type) {
case TRAILER_KEY:
if (conf->key)
warning(_("more than one %s"), conf_key);
if (!value)
return config_error_nonbool(conf_key);
conf->key = xstrdup(value);
break;
case TRAILER_COMMAND:
if (conf->command)
warning(_("more than one %s"), conf_key);
if (!value)
return config_error_nonbool(conf_key);
conf->command = xstrdup(value);
break;
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
case TRAILER_CMD:
if (conf->cmd)
warning(_("more than one %s"), conf_key);
if (!value)
return config_error_nonbool(conf_key);
trailer: add new .cmd config option The `trailer.<token>.command` configuration variable specifies a command (run via the shell, so it does not have to be a single name or path to the command, but can be a shell script), and the first occurrence of substring $ARG is replaced with the value given to the `interpret-trailer` command for the token in a '--trailer <token>=<value>' argument. This has three downsides: * The use of $ARG in the mechanism misleads the users that the value is passed in the shell variable, and tempt them to use $ARG more than once, but that would not work, as the second and subsequent $ARG are not replaced. * Because $ARG is textually replaced without regard to the shell language syntax, even '$ARG' (inside a single-quote pair), which a user would expect to stay intact, would be replaced, and worse, if the value had an unmatched single quote (imagine a name like "O'Connor", substituted into NAME='$ARG' to make it NAME='O'Connor'), it would result in a broken command that is not syntactically correct (or worse). * The first occurrence of substring `$ARG` will be replaced with the empty string, in the command when the command is first called to add a trailer with the specified <token>. This is a bad design, the nature of automatic execution causes it to add a trailer that we don't expect. Introduce a new `trailer.<token>.cmd` configuration that takes higher precedence to deprecate and eventually remove `trailer.<token>.command`, which passes the value as an argument to the command. Instead of "$ARG", users can refer to the value as positional argument, $1, in their scripts. At the same time, in order to allow `git interpret-trailers` to better simulate the behavior of `git command -s`, 'trailer.<token>.cmd' will not automatically execute. Helped-by: Junio C Hamano <gitster@pobox.com> Helped-by: Christian Couder <christian.couder@gmail.com> Signed-off-by: ZheNing Hu <adlternative@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-03 17:41:05 +02:00
conf->cmd = xstrdup(value);
break;
case TRAILER_WHERE:
if (trailer_set_where(&conf->where, value))
warning(_("unknown value '%s' for key '%s'"), value, conf_key);
break;
case TRAILER_IF_EXISTS:
if (trailer_set_if_exists(&conf->if_exists, value))
warning(_("unknown value '%s' for key '%s'"), value, conf_key);
break;
case TRAILER_IF_MISSING:
if (trailer_set_if_missing(&conf->if_missing, value))
warning(_("unknown value '%s' for key '%s'"), value, conf_key);
break;
default:
BUG("trailer.c: unhandled type %d", type);
}
return 0;
}
void trailer_config_init(void)
{
if (configured)
return;
/* Default config must be setup first */
default_conf_info.where = WHERE_END;
default_conf_info.if_exists = EXISTS_ADD_IF_DIFFERENT_NEIGHBOR;
default_conf_info.if_missing = MISSING_ADD;
git_config(git_trailer_default_config, NULL);
git_config(git_trailer_config, NULL);
configured = 1;
}
static const char *token_from_item(struct arg_item *item, char *tok)
{
if (item->conf.key)
return item->conf.key;
if (tok)
return tok;
return item->conf.name;
}
static int token_matches_item(const char *tok, struct arg_item *item, size_t tok_len)
{
if (!strncasecmp(tok, item->conf.name, tok_len))
return 1;
return item->conf.key ? !strncasecmp(tok, item->conf.key, tok_len) : 0;
}
/*
* If the given line is of the form
* "<token><optional whitespace><separator>..." or "<separator>...", return the
* location of the separator. Otherwise, return -1. The optional whitespace
* is allowed there primarily to allow things like "Bug #43" where <token> is
* "Bug" and <separator> is "#".
*
* The separator-starts-line case (in which this function returns 0) is
* distinguished from the non-well-formed-line case (in which this function
* returns -1) because some callers of this function need such a distinction.
*/
static ssize_t find_separator(const char *line, const char *separators)
{
int whitespace_found = 0;
const char *c;
for (c = line; *c; c++) {
if (strchr(separators, *c))
return c - line;
if (!whitespace_found && (isalnum(*c) || *c == '-'))
continue;
if (c != line && (*c == ' ' || *c == '\t')) {
whitespace_found = 1;
continue;
}
break;
}
return -1;
}
/*
* Obtain the token, value, and conf from the given trailer.
*
* separator_pos must not be 0, since the token cannot be an empty string.
*
* If separator_pos is -1, interpret the whole trailer as a token.
*/
static void parse_trailer(struct strbuf *tok, struct strbuf *val,
const struct conf_info **conf, const char *trailer,
ssize_t separator_pos)
{
struct arg_item *item;
size_t tok_len;
struct list_head *pos;
if (separator_pos != -1) {
strbuf_add(tok, trailer, separator_pos);
strbuf_trim(tok);
strbuf_addstr(val, trailer + separator_pos + 1);
strbuf_trim(val);
} else {
strbuf_addstr(tok, trailer);
strbuf_trim(tok);
}
/* Lookup if the token matches something in the config */
tok_len = token_len_without_separator(tok->buf, tok->len);
if (conf)
*conf = &default_conf_info;
list_for_each(pos, &conf_head) {
item = list_entry(pos, struct arg_item, list);
if (token_matches_item(tok->buf, item, tok_len)) {
char *tok_buf = strbuf_detach(tok, NULL);
if (conf)
*conf = &item->conf;
strbuf_addstr(tok, token_from_item(item, tok_buf));
free(tok_buf);
break;
}
}
}
static struct trailer_item *add_trailer_item(struct list_head *head, char *tok,
char *val)
{
struct trailer_item *new_item = xcalloc(1, sizeof(*new_item));
new_item->token = tok;
new_item->value = val;
list_add_tail(&new_item->list, head);
return new_item;
}
static void add_arg_item(struct list_head *arg_head, char *tok, char *val,
const struct conf_info *conf,
const struct new_trailer_item *new_trailer_item)
{
struct arg_item *new_item = xcalloc(1, sizeof(*new_item));
new_item->token = tok;
new_item->value = val;
duplicate_conf(&new_item->conf, conf);
if (new_trailer_item) {
if (new_trailer_item->where != WHERE_DEFAULT)
new_item->conf.where = new_trailer_item->where;
if (new_trailer_item->if_exists != EXISTS_DEFAULT)
new_item->conf.if_exists = new_trailer_item->if_exists;
if (new_trailer_item->if_missing != MISSING_DEFAULT)
new_item->conf.if_missing = new_trailer_item->if_missing;
}
list_add_tail(&new_item->list, arg_head);
}
void parse_trailers_from_config(struct list_head *config_head)
{
struct arg_item *item;
struct list_head *pos;
/* Add an arg item for each configured trailer with a command */
list_for_each(pos, &conf_head) {
item = list_entry(pos, struct arg_item, list);
if (item->conf.command)
add_arg_item(config_head,
xstrdup(token_from_item(item, NULL)),
xstrdup(""),
&item->conf, NULL);
}
}
void parse_trailers_from_command_line_args(struct list_head *arg_head,
struct list_head *new_trailer_head)
{
struct strbuf tok = STRBUF_INIT;
struct strbuf val = STRBUF_INIT;
const struct conf_info *conf;
struct list_head *pos;
/*
* In command-line arguments, '=' is accepted (in addition to the
* separators that are defined).
*/
char *cl_separators = xstrfmt("=%s", separators);
/* Add an arg item for each trailer on the command line */
list_for_each(pos, new_trailer_head) {
struct new_trailer_item *tr =
list_entry(pos, struct new_trailer_item, list);
ssize_t separator_pos = find_separator(tr->text, cl_separators);
if (separator_pos == 0) {
struct strbuf sb = STRBUF_INIT;
strbuf_addstr(&sb, tr->text);
strbuf_trim(&sb);
error(_("empty trailer token in trailer '%.*s'"),
(int) sb.len, sb.buf);
strbuf_release(&sb);
} else {
parse_trailer(&tok, &val, &conf, tr->text,
separator_pos);
add_arg_item(arg_head,
strbuf_detach(&tok, NULL),
strbuf_detach(&val, NULL),
conf, tr);
}
}
free(cl_separators);
}
static const char *next_line(const char *str)
{
const char *nl = strchrnul(str, '\n');
return nl + !!*nl;
}
/*
* Return the position of the start of the last line. If len is 0, return -1.
*/
static ssize_t last_line(const char *buf, size_t len)
{
ssize_t i;
if (len == 0)
return -1;
if (len == 1)
return 0;
/*
* Skip the last character (in addition to the null terminator),
* because if the last character is a newline, it is considered as part
* of the last line anyway.
*/
i = len - 2;
for (; i >= 0; i--) {
if (buf[i] == '\n')
return i + 1;
}
return 0;
}
/*
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
* Find the end of the log message as an offset from the start of the input
* (where callers of this function are interested in looking for a trailers
* block in the same input). We have to consider two categories of content that
* can come at the end of the input which we want to ignore (because they don't
* belong in the log message):
*
* (1) the "patch part" which begins with a "---" divider and has patch
* information (like the output of git-format-patch), and
*
* (2) any trailing comment lines, blank lines like in the output of "git
* commit -v", or stuff below the "cut" (scissor) line.
*
* As a formula, the situation looks like this:
*
* INPUT = LOG MESSAGE + IGNORED
*
* where IGNORED can be either of the two categories described above. It may be
* that there is nothing to ignore. Now it may be the case that the LOG MESSAGE
* contains a trailer block, but that's not the concern of this function.
*/
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
static size_t find_end_of_log_message(const char *input, int no_divider)
{
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
size_t end;
const char *s;
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
/* Assume the naive end of the input is already what we want. */
end = strlen(input);
/* Optionally skip over any patch part ("---" line and below). */
if (!no_divider) {
for (s = input; *s; s = next_line(s)) {
const char *v;
if (skip_prefix(s, "---", &v) && isspace(*v)) {
end = s - input;
break;
}
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
}
}
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
/* Skip over other ignorable bits. */
return end - ignored_log_message_bytes(input, end);
}
/*
* Return the position of the first trailer line or len if there are no
* trailers.
*/
static size_t find_trailer_block_start(const char *buf, size_t len)
{
const char *s;
ssize_t end_of_title, l;
int only_spaces = 1;
int recognized_prefix = 0, trailer_lines = 0, non_trailer_lines = 0;
/*
* Number of possible continuation lines encountered. This will be
* reset to 0 if we encounter a trailer (since those lines are to be
* considered continuations of that trailer), and added to
* non_trailer_lines if we encounter a non-trailer (since those lines
* are to be considered non-trailers).
*/
int possible_continuation_lines = 0;
/* The first paragraph is the title and cannot be trailers */
for (s = buf; s < buf + len; s = next_line(s)) {
find multi-byte comment chars in unterminated buffers As with the previous patch, we need to swap out single-byte matching for something like starts_with() to match all bytes of a multi-byte comment character. But for cases where the buffer is not NUL-terminated (and we instead have an explicit size or end pointer), it's not safe to use starts_with(), as it might walk off the end of the buffer. Let's introduce a new starts_with_mem() that does the same thing but also accepts the length of the "haystack" str and makes sure not to walk past it. Note that in most cases the existing code did not need a length check at all, since it was written in a way that knew we had at least one byte available (and that was all we checked). So I had to read each one to find the appropriate bounds. The one exception is sequencer.c's add_commented_lines(), where we can actually get rid of the length check. Just like starts_with(), our starts_with_mem() handles an empty haystack variable by not matching (assuming a non-empty prefix). A few notes on the implementation of starts_with_mem(): - it would be equally correct to take an "end" pointer (and indeed, many of the callers have this and have to subtract to come up with the length). I think taking a ptr/size combo is a more usual interface for our codebase, though, and has the added benefit that the function signature makes it harder to mix up the three parameters. - we could obviously build starts_with() on top of this by passing strlen(str) as the length. But it's possible that starts_with() is a relatively hot code path, and it should not pay that penalty (it can generally return an answer proportional to the size of the prefix, not the whole string). - it naively feels like xstrncmpz() should be able to do the same thing, but that's not quite true. If you pass the length of the haystack buffer, then strncmp() finds that a shorter prefix string is "less than" than the haystack, even if the haystack starts with the prefix. If you pass the length of the prefix, then you risk reading past the end of the haystack if it is shorter than the prefix. So I think we really do need a new function. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-12 10:17:39 +01:00
if (starts_with_mem(s, buf + len - s, comment_line_str))
continue;
if (is_blank_line(s))
break;
}
end_of_title = s - buf;
/*
* Get the start of the trailers by looking starting from the end for a
* blank line before a set of non-blank lines that (i) are all
* trailers, or (ii) contains at least one Git-generated trailer and
* consists of at least 25% trailers.
*/
for (l = last_line(buf, len);
l >= end_of_title;
l = last_line(buf, l)) {
const char *bol = buf + l;
const char **p;
ssize_t separator_pos;
find multi-byte comment chars in unterminated buffers As with the previous patch, we need to swap out single-byte matching for something like starts_with() to match all bytes of a multi-byte comment character. But for cases where the buffer is not NUL-terminated (and we instead have an explicit size or end pointer), it's not safe to use starts_with(), as it might walk off the end of the buffer. Let's introduce a new starts_with_mem() that does the same thing but also accepts the length of the "haystack" str and makes sure not to walk past it. Note that in most cases the existing code did not need a length check at all, since it was written in a way that knew we had at least one byte available (and that was all we checked). So I had to read each one to find the appropriate bounds. The one exception is sequencer.c's add_commented_lines(), where we can actually get rid of the length check. Just like starts_with(), our starts_with_mem() handles an empty haystack variable by not matching (assuming a non-empty prefix). A few notes on the implementation of starts_with_mem(): - it would be equally correct to take an "end" pointer (and indeed, many of the callers have this and have to subtract to come up with the length). I think taking a ptr/size combo is a more usual interface for our codebase, though, and has the added benefit that the function signature makes it harder to mix up the three parameters. - we could obviously build starts_with() on top of this by passing strlen(str) as the length. But it's possible that starts_with() is a relatively hot code path, and it should not pay that penalty (it can generally return an answer proportional to the size of the prefix, not the whole string). - it naively feels like xstrncmpz() should be able to do the same thing, but that's not quite true. If you pass the length of the haystack buffer, then strncmp() finds that a shorter prefix string is "less than" than the haystack, even if the haystack starts with the prefix. If you pass the length of the prefix, then you risk reading past the end of the haystack if it is shorter than the prefix. So I think we really do need a new function. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-12 10:17:39 +01:00
if (starts_with_mem(bol, buf + len - bol, comment_line_str)) {
non_trailer_lines += possible_continuation_lines;
possible_continuation_lines = 0;
continue;
}
if (is_blank_line(bol)) {
if (only_spaces)
continue;
non_trailer_lines += possible_continuation_lines;
if (recognized_prefix &&
trailer_lines * 3 >= non_trailer_lines)
return next_line(bol) - buf;
else if (trailer_lines && !non_trailer_lines)
return next_line(bol) - buf;
return len;
}
only_spaces = 0;
for (p = git_generated_prefixes; *p; p++) {
if (starts_with(bol, *p)) {
trailer_lines++;
possible_continuation_lines = 0;
recognized_prefix = 1;
goto continue_outer_loop;
}
}
separator_pos = find_separator(bol, separators);
if (separator_pos >= 1 && !isspace(bol[0])) {
struct list_head *pos;
trailer_lines++;
possible_continuation_lines = 0;
if (recognized_prefix)
continue;
list_for_each(pos, &conf_head) {
struct arg_item *item;
item = list_entry(pos, struct arg_item, list);
if (token_matches_item(bol, item,
separator_pos)) {
recognized_prefix = 1;
break;
}
}
} else if (isspace(bol[0]))
possible_continuation_lines++;
else {
non_trailer_lines++;
non_trailer_lines += possible_continuation_lines;
possible_continuation_lines = 0;
}
continue_outer_loop:
;
}
return len;
}
static int ends_with_blank_line(const char *buf, size_t len)
{
ssize_t ll = last_line(buf, len);
if (ll < 0)
return 0;
return is_blank_line(buf + ll);
}
static void unfold_value(struct strbuf *val)
{
struct strbuf out = STRBUF_INIT;
size_t i;
strbuf_grow(&out, val->len);
i = 0;
while (i < val->len) {
char c = val->buf[i++];
if (c == '\n') {
/* Collapse continuation down to a single space. */
while (i < val->len && isspace(val->buf[i]))
i++;
strbuf_addch(&out, ' ');
} else {
strbuf_addch(&out, c);
}
}
/* Empty lines may have left us with whitespace cruft at the edges */
strbuf_trim(&out);
/* output goes back to val as if we modified it in-place */
strbuf_swap(&out, val);
strbuf_release(&out);
}
/*
* Parse trailers in "str", populating the trailer info and "head"
* linked list structure.
*/
void parse_trailers(const struct process_trailer_options *opts,
struct trailer_info *info,
const char *str,
struct list_head *head)
{
struct strbuf tok = STRBUF_INIT;
struct strbuf val = STRBUF_INIT;
size_t i;
trailer_info_get(opts, str, info);
for (i = 0; i < info->trailer_nr; i++) {
int separator_pos;
char *trailer = info->trailers[i];
if (starts_with(trailer, comment_line_str))
continue;
separator_pos = find_separator(trailer, separators);
if (separator_pos >= 1) {
parse_trailer(&tok, &val, NULL, trailer,
separator_pos);
if (opts->unfold)
unfold_value(&val);
add_trailer_item(head,
strbuf_detach(&tok, NULL),
strbuf_detach(&val, NULL));
} else if (!opts->only_trailers) {
strbuf_addstr(&val, trailer);
strbuf_strip_suffix(&val, "\n");
add_trailer_item(head,
NULL,
strbuf_detach(&val, NULL));
}
}
}
void free_trailers(struct list_head *trailers)
{
struct list_head *pos, *p;
list_for_each_safe(pos, p, trailers) {
list_del(pos);
free_trailer_item(list_entry(pos, struct trailer_item, list));
}
}
void trailer_info_get(const struct process_trailer_options *opts,
const char *str,
struct trailer_info *info)
{
size_t end_of_log_message = 0, trailer_block_start = 0;
struct strbuf **trailer_lines, **ptr;
char **trailer_strings = NULL;
size_t nr = 0, alloc = 0;
char **last = NULL;
trailer_config_init();
trailer: find the end of the log message Previously, trailer_info_get() computed the trailer block end position by (1) checking for the opts->no_divider flag and optionally calling find_patch_start() to find the "patch start" location (patch_start), and (2) calling find_trailer_end() to find the end of the trailer block using patch_start as a guide, saving the return value into "trailer_end". The logic in (1) was awkward because the variable "patch_start" is misleading if there is no patch in the input. The logic in (2) was misleading because it could be the case that no trailers are in the input (yet we are setting a "trailer_end" variable before even searching for trailers, which happens later in find_trailer_start()). The name "find_trailer_end" was misleading because that function did not look for any trailer block itself --- instead it just computed the end position of the log message in the input where the end of the trailer block (if it exists) would be (because trailer blocks must always come after the end of the log message). Combine the logic in (1) and (2) together into find_patch_start() by renaming it to find_end_of_log_message(). The end of the log message is the starting point which find_trailer_start() needs to start searching backward to parse individual trailers (if any). Helped-by: Jonathan Tan <jonathantanmy@google.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-10-20 21:01:34 +02:00
end_of_log_message = find_end_of_log_message(str, opts->no_divider);
trailer_block_start = find_trailer_block_start(str, end_of_log_message);
trailer_lines = strbuf_split_buf(str + trailer_block_start,
end_of_log_message - trailer_block_start,
'\n',
0);
for (ptr = trailer_lines; *ptr; ptr++) {
if (last && isspace((*ptr)->buf[0])) {
struct strbuf sb = STRBUF_INIT;
strbuf_attach(&sb, *last, strlen(*last), strlen(*last));
strbuf_addbuf(&sb, *ptr);
*last = strbuf_detach(&sb, NULL);
continue;
}
ALLOC_GROW(trailer_strings, nr + 1, alloc);
trailer_strings[nr] = strbuf_detach(*ptr, NULL);
last = find_separator(trailer_strings[nr], separators) >= 1
? &trailer_strings[nr]
: NULL;
nr++;
}
strbuf_list_free(trailer_lines);
info->blank_line_before_trailer = ends_with_blank_line(str,
trailer_block_start);
info->trailer_block_start = trailer_block_start;
info->trailer_block_end = end_of_log_message;
info->trailers = trailer_strings;
info->trailer_nr = nr;
}
void trailer_info_release(struct trailer_info *info)
{
size_t i;
for (i = 0; i < info->trailer_nr; i++)
free(info->trailers[i]);
free(info->trailers);
}
void format_trailers(const struct process_trailer_options *opts,
struct list_head *trailers,
struct strbuf *out)
{
size_t origlen = out->len;
struct list_head *pos;
struct trailer_item *item;
list_for_each(pos, trailers) {
item = list_entry(pos, struct trailer_item, list);
if (item->token) {
struct strbuf tok = STRBUF_INIT;
struct strbuf val = STRBUF_INIT;
strbuf_addstr(&tok, item->token);
strbuf_addstr(&val, item->value);
/*
* Skip key/value pairs where the value was empty. This
* can happen from trailers specified without a
* separator, like `--trailer "Reviewed-by"` (no
* corresponding value).
*/
if (opts->trim_empty && !strlen(item->value))
continue;
if (!opts->filter || opts->filter(&tok, opts->filter_data)) {
if (opts->separator && out->len != origlen)
strbuf_addbuf(out, opts->separator);
if (!opts->value_only)
strbuf_addbuf(out, &tok);
if (!opts->key_only && !opts->value_only) {
if (opts->key_value_separator)
strbuf_addbuf(out, opts->key_value_separator);
else {
char c = last_non_space_char(tok.buf);
if (c && !strchr(separators, c))
strbuf_addf(out, "%c ", separators[0]);
}
}
if (!opts->key_only)
strbuf_addbuf(out, &val);
if (!opts->separator)
strbuf_addch(out, '\n');
}
strbuf_release(&tok);
strbuf_release(&val);
} else if (!opts->only_trailers) {
if (opts->separator && out->len != origlen) {
strbuf_addbuf(out, opts->separator);
}
strbuf_addstr(out, item->value);
format_trailer_info(): append newline for non-trailer lines This wraps up the preparatory refactors to unify the trailer formatters. Two patches ago we made format_trailer_info() use trailer_item objects instead of the "trailers" string array. The strings in the array include trailing newlines, because the string array is split up with trailer_lines = strbuf_split_buf(str + trailer_block_start, end_of_log_message - trailer_block_start, '\n', 0); in trailer_info_get() and strbuf_split_buf() includes the terminator (in this case the newline character '\n') for each split-up substring. And before we made the transition to use trailer_item objects for it, format_trailer_info() called parse_trailer() (which trims newlines) for trailer lines but did _not_ call parse_trailer() for non-trailer lines. So for trailer lines it had to add back the trimmed newline like this if (!opts->separator) strbuf_addch(out, '\n'); But for non-trailer lines it didn't have to add back the newline because it could just reuse same string in the "trailers" string array (which again, already included the trailing newline). Now that format_trailer_info() uses trailer_item objects for all cases, it can't rely on "trailers" string array anymore. And so it must be taught to add a newline back when printing non-trailer lines, just like it already does for trailer lines. Do so now. The test suite can pass again without the need to hide failures with *_failure, so flip the affected test cases back to *_success. Now, format_trailer_info() is in better shape to supersede format_trailers(), which we'll do in the next commit. Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-15 07:55:03 +01:00
if (opts->separator)
strbuf_rtrim(out);
format_trailer_info(): append newline for non-trailer lines This wraps up the preparatory refactors to unify the trailer formatters. Two patches ago we made format_trailer_info() use trailer_item objects instead of the "trailers" string array. The strings in the array include trailing newlines, because the string array is split up with trailer_lines = strbuf_split_buf(str + trailer_block_start, end_of_log_message - trailer_block_start, '\n', 0); in trailer_info_get() and strbuf_split_buf() includes the terminator (in this case the newline character '\n') for each split-up substring. And before we made the transition to use trailer_item objects for it, format_trailer_info() called parse_trailer() (which trims newlines) for trailer lines but did _not_ call parse_trailer() for non-trailer lines. So for trailer lines it had to add back the trimmed newline like this if (!opts->separator) strbuf_addch(out, '\n'); But for non-trailer lines it didn't have to add back the newline because it could just reuse same string in the "trailers" string array (which again, already included the trailing newline). Now that format_trailer_info() uses trailer_item objects for all cases, it can't rely on "trailers" string array anymore. And so it must be taught to add a newline back when printing non-trailer lines, just like it already does for trailer lines. Do so now. The test suite can pass again without the need to hide failures with *_failure, so flip the affected test cases back to *_success. Now, format_trailer_info() is in better shape to supersede format_trailers(), which we'll do in the next commit. Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-15 07:55:03 +01:00
else
strbuf_addch(out, '\n');
}
}
}
trailer: reorder format_trailers_from_commit() parameters Currently there are two functions for formatting trailers in <trailer.h>: void format_trailers(const struct process_trailer_options *, struct list_head *trailers, FILE *outfile); void format_trailers_from_commit(struct strbuf *out, const char *msg, const struct process_trailer_options *opts); and although they are similar enough (even taking the same process_trailer_options struct pointer) they are used quite differently. One might intuitively think that format_trailers_from_commit() builds on top of format_trailers(), but this is not the case. Instead format_trailers_from_commit() calls format_trailer_info() and format_trailers() is never called in that codepath. This is a preparatory refactor to help us deprecate format_trailers() in favor of format_trailer_info() (at which point we can rename the latter to the former). When the deprecation is complete, both format_trailers_from_commit(), and the interpret-trailers builtin will be able to call into the same helper function (instead of format_trailers() and format_trailer_info(), respectively). Unifying the formatters is desirable because it simplifies the API. Reorder parameters for format_trailers_from_commit() to prefer const struct process_trailer_options *opts as the first parameter, because these options are intimately tied to formatting trailers. And take struct strbuf *out last, because it's an "out parameter" (something that the caller wants to use as the output of this function). Similarly, reorder parameters for format_trailer_info(), because later on we will unify the two together. Signed-off-by: Linus Arver <linusa@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-01 01:14:42 +01:00
void format_trailers_from_commit(const struct process_trailer_options *opts,
const char *msg,
struct strbuf *out)
{
LIST_HEAD(trailer_objects);
struct trailer_info info;
parse_trailers(opts, &info, msg, &trailer_objects);
/* If we want the whole block untouched, we can take the fast path. */
if (!opts->only_trailers && !opts->unfold && !opts->filter &&
!opts->separator && !opts->key_only && !opts->value_only &&
!opts->key_value_separator) {
strbuf_add(out, msg + info.trailer_block_start,
info.trailer_block_end - info.trailer_block_start);
} else
format_trailers(opts, &trailer_objects, out);
free_trailers(&trailer_objects);
trailer_info_release(&info);
}
void trailer_iterator_init(struct trailer_iterator *iter, const char *msg)
{
struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT;
strbuf_init(&iter->key, 0);
strbuf_init(&iter->val, 0);
opts.no_divider = 1;
trailer_info_get(&opts, msg, &iter->internal.info);
iter->internal.cur = 0;
}
int trailer_iterator_advance(struct trailer_iterator *iter)
{
while (iter->internal.cur < iter->internal.info.trailer_nr) {
char *trailer = iter->internal.info.trailers[iter->internal.cur++];
int separator_pos = find_separator(trailer, separators);
if (separator_pos < 1)
continue; /* not a real trailer */
strbuf_reset(&iter->key);
strbuf_reset(&iter->val);
parse_trailer(&iter->key, &iter->val, NULL,
trailer, separator_pos);
/* Always unfold values during iteration. */
unfold_value(&iter->val);
return 1;
}
return 0;
}
void trailer_iterator_release(struct trailer_iterator *iter)
{
trailer_info_release(&iter->internal.info);
strbuf_release(&iter->val);
strbuf_release(&iter->key);
}