diff --git a/.gitignore b/.gitignore index 9229e918cd..d96f4f0c50 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ git-blame git-branch git-bundle git-cat-file +git-check-attr git-check-ref-format git-checkout git-checkout-index diff --git a/Makefile b/Makefile index b8e6030940..ac89d1ba36 100644 --- a/Makefile +++ b/Makefile @@ -283,7 +283,7 @@ LIB_H = \ diff.h object.h pack.h pkt-line.h quote.h refs.h list-objects.h sideband.h \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \ tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \ - utf8.h reflog-walk.h patch-ids.h + utf8.h reflog-walk.h attr.h DIFF_OBJS = \ diff.o diff-lib.o diffcore-break.o diffcore-order.o \ @@ -305,7 +305,7 @@ LIB_OBJS = \ write_or_die.o trace.o list-objects.o grep.o match-trees.o \ alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \ color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \ - convert.o + convert.o attr.o BUILTIN_OBJS = \ builtin-add.o \ @@ -316,6 +316,7 @@ BUILTIN_OBJS = \ builtin-branch.o \ builtin-bundle.o \ builtin-cat-file.o \ + builtin-check-attr.o \ builtin-checkout-index.o \ builtin-check-ref-format.o \ builtin-commit-tree.o \ diff --git a/attr.c b/attr.c new file mode 100644 index 0000000000..7435d927a9 --- /dev/null +++ b/attr.c @@ -0,0 +1,380 @@ +#include "cache.h" +#include "attr.h" + +/* + * The basic design decision here is that we are not going to have + * insanely large number of attributes. + * + * This is a randomly chosen prime. + */ +#define HASHSIZE 257 + +#ifndef DEBUG_ATTR +#define DEBUG_ATTR 0 +#endif + +struct git_attr { + struct git_attr *next; + unsigned h; + char name[FLEX_ARRAY]; +}; + +static struct git_attr *(git_attr_hash[HASHSIZE]); + +static unsigned hash_name(const char *name, int namelen) +{ + unsigned val = 0; + unsigned char c; + + while (namelen--) { + c = *name++; + val = ((val << 7) | (val >> 22)) ^ c; + } + return val; +} + +struct git_attr *git_attr(const char *name, int len) +{ + unsigned hval = hash_name(name, len); + unsigned pos = hval % HASHSIZE; + struct git_attr *a; + + for (a = git_attr_hash[pos]; a; a = a->next) { + if (a->h == hval && + !memcmp(a->name, name, len) && !a->name[len]) + return a; + } + + a = xmalloc(sizeof(*a) + len + 1); + memcpy(a->name, name, len); + a->name[len] = 0; + a->h = hval; + a->next = git_attr_hash[pos]; + git_attr_hash[pos] = a; + return a; +} + +/* + * .gitattributes file is one line per record, each of which is + * + * (1) glob pattern. + * (2) whitespace + * (3) whitespace separated list of attribute names, each of which + * could be prefixed with '!' to mean "not set". + */ + +struct attr_state { + int unset; + struct git_attr *attr; +}; + +struct match_attr { + char *pattern; + unsigned num_attr; + struct attr_state state[FLEX_ARRAY]; +}; + +static const char blank[] = " \t\r\n"; + +static struct match_attr *parse_attr_line(const char *line) +{ + int namelen; + int num_attr; + const char *cp, *name; + struct match_attr *res = res; + int pass; + + cp = line + strspn(line, blank); + if (!*cp || *cp == '#') + return NULL; + name = cp; + namelen = strcspn(name, blank); + + for (pass = 0; pass < 2; pass++) { + /* pass 0 counts and allocates, pass 1 fills */ + num_attr = 0; + cp = name + namelen; + cp = cp + strspn(cp, blank); + while (*cp) { + const char *ep; + ep = cp + strcspn(cp, blank); + if (pass) { + struct attr_state *e; + + e = &(res->state[num_attr]); + if (*cp == '!') { + e->unset = 1; + cp++; + } + e->attr = git_attr(cp, ep - cp); + } + num_attr++; + cp = ep + strspn(ep, blank); + } + if (pass) + break; + res = xcalloc(1, + sizeof(*res) + + sizeof(struct attr_state) * num_attr + + namelen + 1); + res->pattern = (char*)&(res->state[num_attr]); + memcpy(res->pattern, name, namelen); + res->pattern[namelen] = 0; + res->num_attr = num_attr; + } + return res; +} + +/* + * Like info/exclude and .gitignore, the attribute information can + * come from many places. + * + * (1) .gitattribute file of the same directory; + * (2) .gitattribute file of the parent directory if (1) does not have any match; + * this goes recursively upwards, just like .gitignore + * (3) perhaps $GIT_DIR/info/attributes, as the final fallback. + * + * In the same file, later entries override the earlier match, so in the + * global list, we would have entries from info/attributes the earliest + * (reading the file from top to bottom), .gitattribute of the root + * directory (again, reading the file from top to bottom) down to the + * current directory, and then scan the list backwards to find the first match. + * This is exactly the same as what excluded() does in dir.c to deal with + * .gitignore + */ + +static struct attr_stack { + struct attr_stack *prev; + char *origin; + unsigned num_matches; + struct match_attr **attrs; +} *attr_stack; + +static void free_attr_elem(struct attr_stack *e) +{ + int i; + free(e->origin); + for (i = 0; i < e->num_matches; i++) + free(e->attrs[i]); + free(e); +} + +static const char *builtin_attr[] = { + NULL, +}; + +static struct attr_stack *read_attr_from_array(const char **list) +{ + struct attr_stack *res; + const char *line; + + res = xcalloc(1, sizeof(*res)); + while ((line = *(list++)) != NULL) { + struct match_attr *a = parse_attr_line(line); + if (!a) + continue; + res->attrs = xrealloc(res->attrs, res->num_matches + 1); + res->attrs[res->num_matches++] = a; + } + return res; +} + +static struct attr_stack *read_attr_from_file(const char *path) +{ + FILE *fp; + struct attr_stack *res; + char buf[2048]; + + res = xcalloc(1, sizeof(*res)); + fp = fopen(path, "r"); + if (!fp) + return res; + + while (fgets(buf, sizeof(buf), fp)) { + struct match_attr *a = parse_attr_line(buf); + if (!a) + continue; + res->attrs = xrealloc(res->attrs, res->num_matches + 1); + res->attrs[res->num_matches++] = a; + } + fclose(fp); + return res; +} + +#if DEBUG_ATTR +static void debug_info(const char *what, struct attr_stack *elem) +{ + fprintf(stderr, "%s: %s\n", what, elem->origin ? elem->origin : "()"); +} +#define debug_push(a) debug_info("push", (a)) +#define debug_pop(a) debug_info("pop", (a)) +#else +#define debug_push(a) do { ; } while (0) +#define debug_pop(a) do { ; } while (0) +#endif + +static void prepare_attr_stack(const char *path, int dirlen) +{ + struct attr_stack *elem, *info; + int len; + char pathbuf[PATH_MAX]; + + /* + * At the bottom of the attribute stack is the built-in + * set of attribute definitions. Then, contents from + * .gitattribute files from directories closer to the + * root to the ones in deeper directories are pushed + * to the stack. Finally, at the very top of the stack + * we always keep the contents of $GIT_DIR/info/attributes. + * + * When checking, we use entries from near the top of the + * stack, preferring $GIT_DIR/info/attributes, then + * .gitattributes in deeper directories to shallower ones, + * and finally use the built-in set as the default. + */ + if (!attr_stack) { + elem = read_attr_from_array(builtin_attr); + elem->origin = NULL; + elem->prev = attr_stack; + attr_stack = elem; + + elem = read_attr_from_file(GITATTRIBUTES_FILE); + elem->origin = strdup(""); + elem->prev = attr_stack; + attr_stack = elem; + debug_push(elem); + + elem = read_attr_from_file(git_path(INFOATTRIBUTES_FILE)); + elem->origin = NULL; + elem->prev = attr_stack; + attr_stack = elem; + } + + /* + * Pop the "info" one that is always at the top of the stack. + */ + info = attr_stack; + attr_stack = info->prev; + + /* + * Pop the ones from directories that are not the prefix of + * the path we are checking. + */ + while (attr_stack && attr_stack->origin) { + int namelen = strlen(attr_stack->origin); + + elem = attr_stack; + if (namelen <= dirlen && + !strncmp(elem->origin, path, namelen)) + break; + + debug_pop(elem); + attr_stack = elem->prev; + free_attr_elem(elem); + } + + /* + * Read from parent directories and push them down + */ + while (1) { + char *cp; + + len = strlen(attr_stack->origin); + if (dirlen <= len) + break; + memcpy(pathbuf, path, dirlen); + memcpy(pathbuf + dirlen, "/", 2); + cp = strchr(pathbuf + len + 1, '/'); + strcpy(cp + 1, GITATTRIBUTES_FILE); + elem = read_attr_from_file(pathbuf); + *cp = '\0'; + elem->origin = strdup(pathbuf); + elem->prev = attr_stack; + attr_stack = elem; + debug_push(elem); + } + + /* + * Finally push the "info" one at the top of the stack. + */ + info->prev = attr_stack; + attr_stack = info; +} + +static int path_matches(const char *pathname, int pathlen, + const char *pattern, + const char *base, int baselen) +{ + if (!strchr(pattern, '/')) { + /* match basename */ + const char *basename = strrchr(pathname, '/'); + basename = basename ? basename + 1 : pathname; + return (fnmatch(pattern, basename, 0) == 0); + } + /* + * match with FNM_PATHNAME; the pattern has base implicitly + * in front of it. + */ + if (*pattern == '/') + pattern++; + if (pathlen < baselen || + (baselen && pathname[baselen - 1] != '/') || + strncmp(pathname, base, baselen)) + return 0; + return fnmatch(pattern, pathname + baselen, FNM_PATHNAME) == 0; +} + +/* + * I do not like this at all. Only because we allow individual + * attribute to be set or unset incrementally by individual + * lines in .gitattribute files, we need to do this triple + * loop which looks quite wasteful. + */ +static int fill(const char *path, int pathlen, + struct attr_stack *stk, struct git_attr_check *check, + int num, int rem) +{ + int i, j, k; + const char *base = stk->origin ? stk->origin : ""; + + for (i = stk->num_matches - 1; 0 < rem && 0 <= i; i--) { + struct match_attr *a = stk->attrs[i]; + if (path_matches(path, pathlen, + a->pattern, base, strlen(base))) { + for (j = 0; j < a->num_attr; j++) { + struct git_attr *attr = a->state[j].attr; + int set = !a->state[j].unset; + for (k = 0; k < num; k++) { + if (0 <= check[k].isset || + check[k].attr != attr) + continue; + check[k].isset = set; + rem--; + } + } + } + } + return rem; +} + +int git_checkattr(const char *path, int num, struct git_attr_check *check) +{ + struct attr_stack *stk; + const char *cp; + int dirlen, pathlen, i, rem; + + for (i = 0; i < num; i++) + check[i].isset = -1; + + pathlen = strlen(path); + cp = strrchr(path, '/'); + if (!cp) + dirlen = 0; + else + dirlen = cp - path; + prepare_attr_stack(path, dirlen); + rem = num; + for (stk = attr_stack; 0 < rem && stk; stk = stk->prev) + rem = fill(path, pathlen, stk, check, num, rem); + return 0; +} diff --git a/attr.h b/attr.h new file mode 100644 index 0000000000..1e5ab40694 --- /dev/null +++ b/attr.h @@ -0,0 +1,16 @@ +#ifndef ATTR_H +#define ATTR_H + +/* An attribute is a pointer to this opaque structure */ +struct git_attr; + +struct git_attr *git_attr(const char *, int); + +struct git_attr_check { + struct git_attr *attr; + int isset; +}; + +int git_checkattr(const char *path, int, struct git_attr_check *); + +#endif /* ATTR_H */ diff --git a/builtin-check-attr.c b/builtin-check-attr.c new file mode 100644 index 0000000000..47b07210d6 --- /dev/null +++ b/builtin-check-attr.c @@ -0,0 +1,49 @@ +#include "builtin.h" +#include "attr.h" +#include "quote.h" + +static const char check_attr_usage[] = +"git-check-attr attr... [--] pathname..."; + +int cmd_check_attr(int argc, const char **argv, const char *prefix) +{ + struct git_attr_check *check; + int cnt, i, doubledash; + + doubledash = -1; + for (i = 1; doubledash < 0 && i < argc; i++) { + if (!strcmp(argv[i], "--")) + doubledash = i; + } + + /* If there is no double dash, we handle only one attribute */ + if (doubledash < 0) { + cnt = 1; + doubledash = 1; + } else + cnt = doubledash - 1; + doubledash++; + + if (cnt <= 0 || argc < doubledash) + usage(check_attr_usage); + check = xcalloc(cnt, sizeof(*check)); + for (i = 0; i < cnt; i++) { + const char *name; + name = argv[i + 1]; + check[i].attr = git_attr(name, strlen(name)); + } + + for (i = doubledash; i < argc; i++) { + int j; + if (git_checkattr(argv[i], cnt, check)) + die("git_checkattr died"); + for (j = 0; j < cnt; j++) { + write_name_quoted("", 0, argv[i], 1, stdout); + printf(": %s: %s\n", argv[j+1], + (check[j].isset < 0) ? "unspecified" : + (check[j].isset == 0) ? "unset" : + "set"); + } + } + return 0; +} diff --git a/builtin.h b/builtin.h index af203e9e36..d3f3a7496e 100644 --- a/builtin.h +++ b/builtin.h @@ -22,6 +22,7 @@ extern int cmd_branch(int argc, const char **argv, const char *prefix); extern int cmd_bundle(int argc, const char **argv, const char *prefix); extern int cmd_cat_file(int argc, const char **argv, const char *prefix); extern int cmd_checkout_index(int argc, const char **argv, const char *prefix); +extern int cmd_check_attr(int argc, const char **argv, const char *prefix); extern int cmd_check_ref_format(int argc, const char **argv, const char *prefix); extern int cmd_cherry(int argc, const char **argv, const char *prefix); extern int cmd_cherry_pick(int argc, const char **argv, const char *prefix); diff --git a/cache.h b/cache.h index b1bd9e46c2..63af43fe5c 100644 --- a/cache.h +++ b/cache.h @@ -151,6 +151,8 @@ enum object_type { #define CONFIG_ENVIRONMENT "GIT_CONFIG" #define CONFIG_LOCAL_ENVIRONMENT "GIT_CONFIG_LOCAL" #define EXEC_PATH_ENVIRONMENT "GIT_EXEC_PATH" +#define GITATTRIBUTES_FILE ".gitattributes" +#define INFOATTRIBUTES_FILE "info/attributes" extern int is_bare_repository_cfg; extern int is_bare_repository(void); diff --git a/git.c b/git.c index 7def319e60..f20090721a 100644 --- a/git.c +++ b/git.c @@ -234,6 +234,7 @@ static void handle_internal_command(int argc, const char **argv, char **envp) { "cat-file", cmd_cat_file, RUN_SETUP }, { "checkout-index", cmd_checkout_index, RUN_SETUP }, { "check-ref-format", cmd_check_ref_format }, + { "check-attr", cmd_check_attr, RUN_SETUP | NOT_BARE }, { "cherry", cmd_cherry, RUN_SETUP }, { "cherry-pick", cmd_cherry_pick, RUN_SETUP | NOT_BARE }, { "commit-tree", cmd_commit_tree, RUN_SETUP },