From be6e0daee700b2ee28846b54ddfefe08cd4ec506 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sun, 13 Dec 2020 00:25:28 +0000 Subject: [PATCH 1/2] abspath: add a function to resolve paths with missing components Currently, we have a function to resolve paths, strbuf_realpath. This function canonicalizes paths like realpath(3), but permits a trailing component to be absent from the file system. In other words, this is the behavior of the GNU realpath(1) without any arguments. In the future, we'll need this same behavior, except that we want to allow for any number of missing trailing components, which is the behavior of GNU realpath(1) with the -m option. This is useful because we'll want to canonicalize a path that may point to a not yet present path under the .git directory. For example, a user may want to know where an arbitrary ref would be stored if it existed in the file system. Let's refactor strbuf_realpath to move most of the code to an internal function and then pass it two flags to control its behavior. We'll add a strbuf_realpath_forgiving function that has our new behavior, and leave strbuf_realpath with the older, stricter behavior. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- abspath.c | 64 +++++++++++++++++++++++++++++++++++++++---------------- cache.h | 2 ++ 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/abspath.c b/abspath.c index 6f15a418bb..39e06b5848 100644 --- a/abspath.c +++ b/abspath.c @@ -67,19 +67,15 @@ static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) #endif /* - * Return the real path (i.e., absolute path, with symlinks resolved - * and extra slashes removed) equivalent to the specified path. (If - * you want an absolute path but don't mind links, use - * absolute_path().) Places the resolved realpath in the provided strbuf. - * - * The directory part of path (i.e., everything up to the last - * dir_sep) must denote a valid, existing directory, but the last - * component need not exist. If die_on_error is set, then die with an - * informative error message if there is a problem. Otherwise, return - * NULL on errors (without generating any output). + * If set, any number of trailing components may be missing; otherwise, only one + * may be. */ -char *strbuf_realpath(struct strbuf *resolved, const char *path, - int die_on_error) +#define REALPATH_MANY_MISSING (1 << 0) +/* Should we die if there's an error? */ +#define REALPATH_DIE_ON_ERROR (1 << 1) + +static char *strbuf_realpath_1(struct strbuf *resolved, const char *path, + int flags) { struct strbuf remaining = STRBUF_INIT; struct strbuf next = STRBUF_INIT; @@ -89,7 +85,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, struct stat st; if (!*path) { - if (die_on_error) + if (flags & REALPATH_DIE_ON_ERROR) die("The empty string is not a valid path"); else goto error_out; @@ -101,7 +97,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, if (!resolved->len) { /* relative path; can use CWD as the initial resolved path */ if (strbuf_getcwd(resolved)) { - if (die_on_error) + if (flags & REALPATH_DIE_ON_ERROR) die_errno("unable to get current working directory"); else goto error_out; @@ -129,8 +125,9 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, if (lstat(resolved->buf, &st)) { /* error out unless this was the last component */ - if (errno != ENOENT || remaining.len) { - if (die_on_error) + if (errno != ENOENT || + (!(flags & REALPATH_MANY_MISSING) && remaining.len)) { + if (flags & REALPATH_DIE_ON_ERROR) die_errno("Invalid path '%s'", resolved->buf); else @@ -143,7 +140,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, if (num_symlinks++ > MAXSYMLINKS) { errno = ELOOP; - if (die_on_error) + if (flags & REALPATH_DIE_ON_ERROR) die("More than %d nested symlinks " "on path '%s'", MAXSYMLINKS, path); else @@ -153,7 +150,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, len = strbuf_readlink(&symlink, resolved->buf, st.st_size); if (len < 0) { - if (die_on_error) + if (flags & REALPATH_DIE_ON_ERROR) die_errno("Invalid symlink '%s'", resolved->buf); else @@ -202,6 +199,37 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, return retval; } +/* + * Return the real path (i.e., absolute path, with symlinks resolved + * and extra slashes removed) equivalent to the specified path. (If + * you want an absolute path but don't mind links, use + * absolute_path().) Places the resolved realpath in the provided strbuf. + * + * The directory part of path (i.e., everything up to the last + * dir_sep) must denote a valid, existing directory, but the last + * component need not exist. If die_on_error is set, then die with an + * informative error message if there is a problem. Otherwise, return + * NULL on errors (without generating any output). + */ +char *strbuf_realpath(struct strbuf *resolved, const char *path, + int die_on_error) +{ + return strbuf_realpath_1(resolved, path, + die_on_error ? REALPATH_DIE_ON_ERROR : 0); +} + +/* + * Just like strbuf_realpath, but allows an arbitrary number of path + * components to be missing. + */ +char *strbuf_realpath_forgiving(struct strbuf *resolved, const char *path, + int die_on_error) +{ + return strbuf_realpath_1(resolved, path, + ((die_on_error ? REALPATH_DIE_ON_ERROR : 0) | + REALPATH_MANY_MISSING)); +} + char *real_pathdup(const char *path, int die_on_error) { struct strbuf realpath = STRBUF_INIT; diff --git a/cache.h b/cache.h index 8d279bc110..487ebab8b0 100644 --- a/cache.h +++ b/cache.h @@ -1325,6 +1325,8 @@ static inline int is_absolute_path(const char *path) int is_directory(const char *); char *strbuf_realpath(struct strbuf *resolved, const char *path, int die_on_error); +char *strbuf_realpath_forgiving(struct strbuf *resolved, const char *path, + int die_on_error); char *real_pathdup(const char *path, int die_on_error); const char *absolute_path(const char *path); char *absolute_pathdup(const char *path); From fac60b89252e5865da077bb6ccaa9bef43efbfeb Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sun, 13 Dec 2020 00:25:29 +0000 Subject: [PATCH 2/2] rev-parse: add option for absolute or relative path formatting git rev-parse has several options which print various paths. Some of these paths are printed relative to the current working directory, and some are absolute. Normally, this is not a problem, but there are times when one wants paths entirely in one format or another. This can be done trivially if the paths are canonical, but canonicalizing paths is not possible on some shell scripting environments which lack realpath(1) and also in Go, which lacks functions that properly canonicalize paths on Windows. To help out the scripter, let's provide an option which turns most of the paths printed by git rev-parse to be either relative to the current working directory or absolute and canonical. Document which options are affected and which are not so that users are not confused. This approach is cleaner and tidier than providing duplicates of existing options which are either relative or absolute. Note that if the user needs both forms, it is possible to pass an additional option in the middle of the command line which changes the behavior of subsequent operations. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano --- Documentation/git-rev-parse.txt | 74 +++++++++++++--------- builtin/rev-parse.c | 106 ++++++++++++++++++++++++++++---- t/t1500-rev-parse.sh | 57 ++++++++++++++++- 3 files changed, 194 insertions(+), 43 deletions(-) diff --git a/Documentation/git-rev-parse.txt b/Documentation/git-rev-parse.txt index 5013daa6ef..6b8ca085aa 100644 --- a/Documentation/git-rev-parse.txt +++ b/Documentation/git-rev-parse.txt @@ -212,6 +212,18 @@ Options for Files Only the names of the variables are listed, not their value, even if they are set. +--path-format=(absolute|relative):: + Controls the behavior of certain other options. If specified as absolute, the + paths printed by those options will be absolute and canonical. If specified as + relative, the paths will be relative to the current working directory if that + is possible. The default is option specific. ++ +This option may be specified multiple times and affects only the arguments that +follow it on the command line, either to the end of the command line or the next +instance of this option. + +The following options are modified by `--path-format`: + --git-dir:: Show `$GIT_DIR` if defined. Otherwise show the path to the .git directory. The path shown, when relative, is @@ -221,13 +233,42 @@ If `$GIT_DIR` is not defined and the current directory is not detected to lie in a Git repository or work tree print a message to stderr and exit with nonzero status. +--git-common-dir:: + Show `$GIT_COMMON_DIR` if defined, else `$GIT_DIR`. + +--resolve-git-dir :: + Check if is a valid repository or a gitfile that + points at a valid repository, and print the location of the + repository. If is a gitfile then the resolved path + to the real repository is printed. + +--git-path :: + Resolve "$GIT_DIR/" and takes other path relocation + variables such as $GIT_OBJECT_DIRECTORY, + $GIT_INDEX_FILE... into account. For example, if + $GIT_OBJECT_DIRECTORY is set to /foo/bar then "git rev-parse + --git-path objects/abc" returns /foo/bar/abc. + +--show-toplevel:: + Show the (by default, absolute) path of the top-level directory + of the working tree. If there is no working tree, report an error. + +--show-superproject-working-tree:: + Show the absolute path of the root of the superproject's + working tree (if exists) that uses the current repository as + its submodule. Outputs nothing if the current repository is + not used as a submodule by any project. + +--shared-index-path:: + Show the path to the shared index file in split index mode, or + empty if not in split-index mode. + +The following options are unaffected by `--path-format`: + --absolute-git-dir:: Like `--git-dir`, but its output is always the canonicalized absolute path. ---git-common-dir:: - Show `$GIT_COMMON_DIR` if defined, else `$GIT_DIR`. - --is-inside-git-dir:: When the current working directory is below the repository directory print "true", otherwise "false". @@ -242,19 +283,6 @@ print a message to stderr and exit with nonzero status. --is-shallow-repository:: When the repository is shallow print "true", otherwise "false". ---resolve-git-dir :: - Check if is a valid repository or a gitfile that - points at a valid repository, and print the location of the - repository. If is a gitfile then the resolved path - to the real repository is printed. - ---git-path :: - Resolve "$GIT_DIR/" and takes other path relocation - variables such as $GIT_OBJECT_DIRECTORY, - $GIT_INDEX_FILE... into account. For example, if - $GIT_OBJECT_DIRECTORY is set to /foo/bar then "git rev-parse - --git-path objects/abc" returns /foo/bar/abc. - --show-cdup:: When the command is invoked from a subdirectory, show the path of the top-level directory relative to the current @@ -265,20 +293,6 @@ print a message to stderr and exit with nonzero status. path of the current directory relative to the top-level directory. ---show-toplevel:: - Show the absolute path of the top-level directory of the working - tree. If there is no working tree, report an error. - ---show-superproject-working-tree:: - Show the absolute path of the root of the superproject's - working tree (if exists) that uses the current repository as - its submodule. Outputs nothing if the current repository is - not used as a submodule by any project. - ---shared-index-path:: - Show the path to the shared index file in split index mode, or - empty if not in split-index mode. - --show-object-format[=(storage|input|output)]:: Show the object format (hash algorithm) used for the repository for storage inside the `.git` directory, input, or output. For diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 69ba7326cf..85bad9052e 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -583,6 +583,75 @@ static void handle_ref_opt(const char *pattern, const char *prefix) clear_ref_exclusion(&ref_excludes); } +enum format_type { + /* We would like a relative path. */ + FORMAT_RELATIVE, + /* We would like a canonical absolute path. */ + FORMAT_CANONICAL, + /* We would like the default behavior. */ + FORMAT_DEFAULT, +}; + +enum default_type { + /* Our default is a relative path. */ + DEFAULT_RELATIVE, + /* Our default is a relative path if there's a shared root. */ + DEFAULT_RELATIVE_IF_SHARED, + /* Our default is a canonical absolute path. */ + DEFAULT_CANONICAL, + /* Our default is not to modify the item. */ + DEFAULT_UNMODIFIED, +}; + +static void print_path(const char *path, const char *prefix, enum format_type format, enum default_type def) +{ + char *cwd = NULL; + /* + * We don't ever produce a relative path if prefix is NULL, so set the + * prefix to the current directory so that we can produce a relative + * path whenever possible. If we're using RELATIVE_IF_SHARED mode, then + * we want an absolute path unless the two share a common prefix, so don't + * set it in that case, since doing so causes a relative path to always + * be produced if possible. + */ + if (!prefix && (format != FORMAT_DEFAULT || def != DEFAULT_RELATIVE_IF_SHARED)) + prefix = cwd = xgetcwd(); + if (format == FORMAT_DEFAULT && def == DEFAULT_UNMODIFIED) { + puts(path); + } else if (format == FORMAT_RELATIVE || + (format == FORMAT_DEFAULT && def == DEFAULT_RELATIVE)) { + /* + * In order for relative_path to work as expected, we need to + * make sure that both paths are absolute paths. If we don't, + * we can end up with an unexpected absolute path that the user + * didn't want. + */ + struct strbuf buf = STRBUF_INIT, realbuf = STRBUF_INIT, prefixbuf = STRBUF_INIT; + if (!is_absolute_path(path)) { + strbuf_realpath_forgiving(&realbuf, path, 1); + path = realbuf.buf; + } + if (!is_absolute_path(prefix)) { + strbuf_realpath_forgiving(&prefixbuf, prefix, 1); + prefix = prefixbuf.buf; + } + puts(relative_path(path, prefix, &buf)); + strbuf_release(&buf); + strbuf_release(&realbuf); + strbuf_release(&prefixbuf); + } else if (format == FORMAT_DEFAULT && def == DEFAULT_RELATIVE_IF_SHARED) { + struct strbuf buf = STRBUF_INIT; + puts(relative_path(path, prefix, &buf)); + strbuf_release(&buf); + } else { + struct strbuf buf = STRBUF_INIT; + strbuf_realpath_forgiving(&buf, path, 1); + puts(buf.buf); + strbuf_release(&buf); + } + free(cwd); +} + int cmd_rev_parse(int argc, const char **argv, const char *prefix) { int i, as_is = 0, verify = 0, quiet = 0, revs_count = 0, type = 0; @@ -596,6 +665,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) struct strbuf buf = STRBUF_INIT; const int hexsz = the_hash_algo->hexsz; int seen_end_of_options = 0; + enum format_type format = FORMAT_DEFAULT; if (argc > 1 && !strcmp("--parseopt", argv[1])) return cmd_parseopt(argc - 1, argv + 1, prefix); @@ -668,8 +738,9 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) if (!argv[i + 1]) die("--git-path requires an argument"); strbuf_reset(&buf); - puts(relative_path(git_path("%s", argv[i + 1]), - prefix, &buf)); + print_path(git_path("%s", argv[i + 1]), prefix, + format, + DEFAULT_RELATIVE_IF_SHARED); i++; continue; } @@ -687,6 +758,16 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) show(arg); continue; } + if (opt_with_value(arg, "--path-format", &arg)) { + if (!strcmp(arg, "absolute")) { + format = FORMAT_CANONICAL; + } else if (!strcmp(arg, "relative")) { + format = FORMAT_RELATIVE; + } else { + die("unknown argument to --path-format: %s", arg); + } + continue; + } if (!strcmp(arg, "--default")) { def = argv[++i]; if (!def) @@ -807,7 +888,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) if (!strcmp(arg, "--show-toplevel")) { const char *work_tree = get_git_work_tree(); if (work_tree) - puts(work_tree); + print_path(work_tree, prefix, format, DEFAULT_UNMODIFIED); else die("this operation must be run in a work tree"); continue; @@ -815,7 +896,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) if (!strcmp(arg, "--show-superproject-working-tree")) { struct strbuf superproject = STRBUF_INIT; if (get_superproject_working_tree(&superproject)) - puts(superproject.buf); + print_path(superproject.buf, prefix, format, DEFAULT_UNMODIFIED); strbuf_release(&superproject); continue; } @@ -850,16 +931,18 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) const char *gitdir = getenv(GIT_DIR_ENVIRONMENT); char *cwd; int len; + enum format_type wanted = format; if (arg[2] == 'g') { /* --git-dir */ if (gitdir) { - puts(gitdir); + print_path(gitdir, prefix, format, DEFAULT_UNMODIFIED); continue; } if (!prefix) { - puts(".git"); + print_path(".git", prefix, format, DEFAULT_UNMODIFIED); continue; } } else { /* --absolute-git-dir */ + wanted = FORMAT_CANONICAL; if (!gitdir && !prefix) gitdir = ".git"; if (gitdir) { @@ -872,14 +955,14 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) } cwd = xgetcwd(); len = strlen(cwd); - printf("%s%s.git\n", cwd, len && cwd[len-1] != '/' ? "/" : ""); + strbuf_reset(&buf); + strbuf_addf(&buf, "%s%s.git", cwd, len && cwd[len-1] != '/' ? "/" : ""); free(cwd); + print_path(buf.buf, prefix, wanted, DEFAULT_CANONICAL); continue; } if (!strcmp(arg, "--git-common-dir")) { - strbuf_reset(&buf); - puts(relative_path(get_git_common_dir(), - prefix, &buf)); + print_path(get_git_common_dir(), prefix, format, DEFAULT_RELATIVE_IF_SHARED); continue; } if (!strcmp(arg, "--is-inside-git-dir")) { @@ -909,8 +992,7 @@ int cmd_rev_parse(int argc, const char **argv, const char *prefix) if (the_index.split_index) { const struct object_id *oid = &the_index.split_index->base_oid; const char *path = git_path("sharedindex.%s", oid_to_hex(oid)); - strbuf_reset(&buf); - puts(relative_path(path, prefix, &buf)); + print_path(path, prefix, format, DEFAULT_RELATIVE); } continue; } diff --git a/t/t1500-rev-parse.sh b/t/t1500-rev-parse.sh index 408b97d5af..51d7d40ec1 100755 --- a/t/t1500-rev-parse.sh +++ b/t/t1500-rev-parse.sh @@ -3,6 +3,16 @@ test_description='test git rev-parse' . ./test-lib.sh +test_one () { + dir="$1" && + expect="$2" && + shift && + shift && + echo "$expect" >expect && + git -C "$dir" rev-parse "$@" >actual && + test_cmp expect actual +} + # usage: [options] label is-bare is-inside-git is-inside-work prefix git-dir absolute-git-dir test_rev_parse () { d= @@ -60,7 +70,13 @@ ROOT=$(pwd) test_expect_success 'setup' ' mkdir -p sub/dir work && - cp -R .git repo.git + cp -R .git repo.git && + git checkout -B main && + test_commit abc && + git checkout -b side && + test_commit def && + git checkout main && + git worktree add worktree side ' test_rev_parse toplevel false false true '' .git "$ROOT/.git" @@ -88,6 +104,45 @@ test_rev_parse -C work -g ../repo.git -b t 'GIT_DIR=../repo.git, core.bare = tru test_rev_parse -C work -g ../repo.git -b u 'GIT_DIR=../repo.git, core.bare undefined' false false true '' +test_expect_success 'rev-parse --path-format=absolute' ' + test_one "." "$ROOT/.git" --path-format=absolute --git-dir && + test_one "." "$ROOT/.git" --path-format=absolute --git-common-dir && + test_one "sub/dir" "$ROOT/.git" --path-format=absolute --git-dir && + test_one "sub/dir" "$ROOT/.git" --path-format=absolute --git-common-dir && + test_one "worktree" "$ROOT/.git/worktrees/worktree" --path-format=absolute --git-dir && + test_one "worktree" "$ROOT/.git" --path-format=absolute --git-common-dir && + test_one "." "$ROOT" --path-format=absolute --show-toplevel && + test_one "." "$ROOT/.git/objects" --path-format=absolute --git-path objects && + test_one "." "$ROOT/.git/objects/foo/bar/baz" --path-format=absolute --git-path objects/foo/bar/baz +' + +test_expect_success 'rev-parse --path-format=relative' ' + test_one "." ".git" --path-format=relative --git-dir && + test_one "." ".git" --path-format=relative --git-common-dir && + test_one "sub/dir" "../../.git" --path-format=relative --git-dir && + test_one "sub/dir" "../../.git" --path-format=relative --git-common-dir && + test_one "worktree" "../.git/worktrees/worktree" --path-format=relative --git-dir && + test_one "worktree" "../.git" --path-format=relative --git-common-dir && + test_one "." "./" --path-format=relative --show-toplevel && + test_one "." ".git/objects" --path-format=relative --git-path objects && + test_one "." ".git/objects/foo/bar/baz" --path-format=relative --git-path objects/foo/bar/baz +' + +test_expect_success '--path-format=relative does not affect --absolute-git-dir' ' + git rev-parse --path-format=relative --absolute-git-dir >actual && + echo "$ROOT/.git" >expect && + test_cmp expect actual +' + +test_expect_success '--path-format can change in the middle of the command line' ' + git rev-parse --path-format=absolute --git-dir --path-format=relative --git-path objects/foo/bar >actual && + cat >expect <<-EOF && + $ROOT/.git + .git/objects/foo/bar + EOF + test_cmp expect actual +' + test_expect_success 'git-common-dir from worktree root' ' echo .git >expect && git rev-parse --git-common-dir >actual &&