1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-05 08:16:11 +02:00
git/list-objects.c
Derrick Stolee 4f6d26b167 list-objects: consume sparse tree walk
When creating a pack-file using 'git pack-objects --revs' we provide
a list of interesting and uninteresting commits. For example, a push
operation would make the local topic branch be interesting and the
known remote refs as uninteresting. We want to discover the set of
new objects to send to the server as a thin pack.

We walk these commits until we discover a frontier of commits such
that every commit walk starting at interesting commits ends in a root
commit or unintersting commit. We then need to discover which
non-commit objects are reachable from  uninteresting commits. This
commit walk is not changing during this series.

The mark_edges_uninteresting() method in list-objects.c iterates on
the commit list and does the following:

* If the commit is UNINTERSTING, then mark its root tree and every
  object it can reach as UNINTERESTING.

* If the commit is interesting, then mark the root tree of every
  UNINTERSTING parent (and all objects that tree can reach) as
  UNINTERSTING.

At the very end, we repeat the process on every commit directly
given to the revision walk from stdin. This helps ensure we properly
cover shallow commits that otherwise were not included in the
frontier.

The logic to recursively follow trees is in the
mark_tree_uninteresting() method in revision.c. The algorithm avoids
duplicate work by not recursing into trees that are already marked
UNINTERSTING.

Add a new 'sparse' option to the mark_edges_uninteresting() method
that performs this logic in a slightly different way. As we iterate
over the commits, we add all of the root trees to an oidset. Then,
call mark_trees_uninteresting_sparse() on that oidset. Note that we
include interesting trees in this process. The current implementation
of mark_trees_unintersting_sparse() will walk the same trees as
the old logic, but this will be replaced in a later change.

Add a '--sparse' flag in 'git pack-objects' to call this new logic.
Add a new test script t/t5322-pack-objects-sparse.sh that tests this
option. The tests currently demonstrate that the resulting object
list is the same as the old algorithm. This includes a case where
both algorithms pack an object that is not needed by a remote due to
limits on the explored set of trees. When the sparse algorithm is
changed in a later commit, we will add a test that demonstrates a
change of behavior in some cases.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-01-17 13:44:39 -08:00

416 lines
11 KiB
C

#include "cache.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "diff.h"
#include "tree-walk.h"
#include "revision.h"
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
#include "packfile.h"
#include "object-store.h"
#include "trace.h"
struct traversal_context {
struct rev_info *revs;
show_object_fn show_object;
show_commit_fn show_commit;
void *show_data;
filter_object_fn filter_fn;
void *filter_data;
};
static void process_blob(struct traversal_context *ctx,
struct blob *blob,
struct strbuf *path,
const char *name)
{
struct object *obj = &blob->object;
size_t pathlen;
enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
if (!ctx->revs->blob_objects)
return;
if (!obj)
die("bad blob object");
if (obj->flags & (UNINTERESTING | SEEN))
return;
/*
* Pre-filter known-missing objects when explicitly requested.
* Otherwise, a missing object error message may be reported
* later (depending on other filtering criteria).
*
* Note that this "--exclude-promisor-objects" pre-filtering
* may cause the actual filter to report an incomplete list
* of missing objects.
*/
if (ctx->revs->exclude_promisor_objects &&
!has_object_file(&obj->oid) &&
is_promisor_object(&obj->oid))
return;
pathlen = path->len;
strbuf_addstr(path, name);
if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
r = ctx->filter_fn(LOFS_BLOB, obj,
path->buf, &path->buf[pathlen],
ctx->filter_data);
if (r & LOFR_MARK_SEEN)
obj->flags |= SEEN;
if (r & LOFR_DO_SHOW)
ctx->show_object(obj, path->buf, ctx->show_data);
strbuf_setlen(path, pathlen);
}
/*
* Processing a gitlink entry currently does nothing, since
* we do not recurse into the subproject.
*
* We *could* eventually add a flag that actually does that,
* which would involve:
* - is the subproject actually checked out?
* - if so, see if the subproject has already been added
* to the alternates list, and add it if not.
* - process the commit (or tag) the gitlink points to
* recursively.
*
* However, it's unclear whether there is really ever any
* reason to see superprojects and subprojects as such a
* "unified" object pool (potentially resulting in a totally
* humongous pack - avoiding which was the whole point of
* having gitlinks in the first place!).
*
* So for now, there is just a note that we *could* follow
* the link, and how to do it. Whether it necessarily makes
* any sense what-so-ever to ever do that is another issue.
*/
static void process_gitlink(struct traversal_context *ctx,
const unsigned char *sha1,
struct strbuf *path,
const char *name)
{
/* Nothing to do */
}
static void process_tree(struct traversal_context *ctx,
struct tree *tree,
struct strbuf *base,
const char *name);
static void process_tree_contents(struct traversal_context *ctx,
struct tree *tree,
struct strbuf *base)
{
struct tree_desc desc;
struct name_entry entry;
enum interesting match = ctx->revs->diffopt.pathspec.nr == 0 ?
all_entries_interesting : entry_not_interesting;
init_tree_desc(&desc, tree->buffer, tree->size);
while (tree_entry(&desc, &entry)) {
if (match != all_entries_interesting) {
match = tree_entry_interesting(&entry, base, 0,
&ctx->revs->diffopt.pathspec);
if (match == all_entries_not_interesting)
break;
if (match == entry_not_interesting)
continue;
}
if (S_ISDIR(entry.mode)) {
struct tree *t = lookup_tree(the_repository, entry.oid);
t->object.flags |= NOT_USER_GIVEN;
process_tree(ctx, t, base, entry.path);
}
else if (S_ISGITLINK(entry.mode))
process_gitlink(ctx, entry.oid->hash,
base, entry.path);
else {
struct blob *b = lookup_blob(the_repository, entry.oid);
b->object.flags |= NOT_USER_GIVEN;
process_blob(ctx, b, base, entry.path);
}
}
}
static void process_tree(struct traversal_context *ctx,
struct tree *tree,
struct strbuf *base,
const char *name)
{
struct object *obj = &tree->object;
struct rev_info *revs = ctx->revs;
int baselen = base->len;
enum list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_DO_SHOW;
int failed_parse;
if (!revs->tree_objects)
return;
if (!obj)
die("bad tree object");
if (obj->flags & (UNINTERESTING | SEEN))
return;
failed_parse = parse_tree_gently(tree, 1);
if (failed_parse) {
if (revs->ignore_missing_links)
return;
/*
* Pre-filter known-missing tree objects when explicitly
* requested. This may cause the actual filter to report
* an incomplete list of missing objects.
*/
if (revs->exclude_promisor_objects &&
is_promisor_object(&obj->oid))
return;
if (!revs->do_not_die_on_missing_tree)
die("bad tree object %s", oid_to_hex(&obj->oid));
}
strbuf_addstr(base, name);
if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn)
r = ctx->filter_fn(LOFS_BEGIN_TREE, obj,
base->buf, &base->buf[baselen],
ctx->filter_data);
if (r & LOFR_MARK_SEEN)
obj->flags |= SEEN;
if (r & LOFR_DO_SHOW)
ctx->show_object(obj, base->buf, ctx->show_data);
if (base->len)
strbuf_addch(base, '/');
if (r & LOFR_SKIP_TREE)
trace_printf("Skipping contents of tree %s...\n", base->buf);
else if (!failed_parse)
process_tree_contents(ctx, tree, base);
if ((obj->flags & NOT_USER_GIVEN) && ctx->filter_fn) {
r = ctx->filter_fn(LOFS_END_TREE, obj,
base->buf, &base->buf[baselen],
ctx->filter_data);
if (r & LOFR_MARK_SEEN)
obj->flags |= SEEN;
if (r & LOFR_DO_SHOW)
ctx->show_object(obj, base->buf, ctx->show_data);
}
strbuf_setlen(base, baselen);
free_tree_buffer(tree);
}
static void mark_edge_parents_uninteresting(struct commit *commit,
struct rev_info *revs,
show_edge_fn show_edge)
{
struct commit_list *parents;
for (parents = commit->parents; parents; parents = parents->next) {
struct commit *parent = parents->item;
if (!(parent->object.flags & UNINTERESTING))
continue;
mark_tree_uninteresting(revs->repo, get_commit_tree(parent));
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
parent->object.flags |= SHOWN;
show_edge(parent);
}
}
}
static void add_edge_parents(struct commit *commit,
struct rev_info *revs,
show_edge_fn show_edge,
struct oidset *set)
{
struct commit_list *parents;
for (parents = commit->parents; parents; parents = parents->next) {
struct commit *parent = parents->item;
struct tree *tree = get_commit_tree(parent);
if (!tree)
continue;
oidset_insert(set, &tree->object.oid);
if (!(parent->object.flags & UNINTERESTING))
continue;
tree->object.flags |= UNINTERESTING;
if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
parent->object.flags |= SHOWN;
show_edge(parent);
}
}
}
void mark_edges_uninteresting(struct rev_info *revs,
show_edge_fn show_edge,
int sparse)
{
struct commit_list *list;
int i;
if (sparse) {
struct oidset set;
oidset_init(&set, 16);
for (list = revs->commits; list; list = list->next) {
struct commit *commit = list->item;
struct tree *tree = get_commit_tree(commit);
if (commit->object.flags & UNINTERESTING)
tree->object.flags |= UNINTERESTING;
oidset_insert(&set, &tree->object.oid);
add_edge_parents(commit, revs, show_edge, &set);
}
mark_trees_uninteresting_sparse(revs->repo, &set);
oidset_clear(&set);
} else {
for (list = revs->commits; list; list = list->next) {
struct commit *commit = list->item;
if (commit->object.flags & UNINTERESTING) {
mark_tree_uninteresting(revs->repo,
get_commit_tree(commit));
if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
commit->object.flags |= SHOWN;
show_edge(commit);
}
continue;
}
mark_edge_parents_uninteresting(commit, revs, show_edge);
}
}
if (revs->edge_hint_aggressive) {
for (i = 0; i < revs->cmdline.nr; i++) {
struct object *obj = revs->cmdline.rev[i].item;
struct commit *commit = (struct commit *)obj;
if (obj->type != OBJ_COMMIT || !(obj->flags & UNINTERESTING))
continue;
mark_tree_uninteresting(revs->repo,
get_commit_tree(commit));
if (!(obj->flags & SHOWN)) {
obj->flags |= SHOWN;
show_edge(commit);
}
}
}
}
static void add_pending_tree(struct rev_info *revs, struct tree *tree)
{
add_pending_object(revs, &tree->object, "");
}
static void traverse_trees_and_blobs(struct traversal_context *ctx,
struct strbuf *base)
{
int i;
assert(base->len == 0);
for (i = 0; i < ctx->revs->pending.nr; i++) {
struct object_array_entry *pending = ctx->revs->pending.objects + i;
struct object *obj = pending->item;
const char *name = pending->name;
const char *path = pending->path;
if (obj->flags & (UNINTERESTING | SEEN))
continue;
if (obj->type == OBJ_TAG) {
obj->flags |= SEEN;
ctx->show_object(obj, name, ctx->show_data);
continue;
}
if (!path)
path = "";
if (obj->type == OBJ_TREE) {
process_tree(ctx, (struct tree *)obj, base, path);
continue;
}
if (obj->type == OBJ_BLOB) {
process_blob(ctx, (struct blob *)obj, base, path);
continue;
}
die("unknown pending object %s (%s)",
oid_to_hex(&obj->oid), name);
}
object_array_clear(&ctx->revs->pending);
}
static void do_traverse(struct traversal_context *ctx)
{
struct commit *commit;
struct strbuf csp; /* callee's scratch pad */
strbuf_init(&csp, PATH_MAX);
while ((commit = get_revision(ctx->revs)) != NULL) {
/*
* an uninteresting boundary commit may not have its tree
* parsed yet, but we are not going to show them anyway
*/
if (get_commit_tree(commit)) {
struct tree *tree = get_commit_tree(commit);
tree->object.flags |= NOT_USER_GIVEN;
add_pending_tree(ctx->revs, tree);
}
ctx->show_commit(commit, ctx->show_data);
if (ctx->revs->tree_blobs_in_commit_order)
/*
* NEEDSWORK: Adding the tree and then flushing it here
* needs a reallocation for each commit. Can we pass the
* tree directory without allocation churn?
*/
traverse_trees_and_blobs(ctx, &csp);
}
traverse_trees_and_blobs(ctx, &csp);
strbuf_release(&csp);
}
void traverse_commit_list(struct rev_info *revs,
show_commit_fn show_commit,
show_object_fn show_object,
void *show_data)
{
struct traversal_context ctx;
ctx.revs = revs;
ctx.show_commit = show_commit;
ctx.show_object = show_object;
ctx.show_data = show_data;
ctx.filter_fn = NULL;
ctx.filter_data = NULL;
do_traverse(&ctx);
}
void traverse_commit_list_filtered(
struct list_objects_filter_options *filter_options,
struct rev_info *revs,
show_commit_fn show_commit,
show_object_fn show_object,
void *show_data,
struct oidset *omitted)
{
struct traversal_context ctx;
filter_free_fn filter_free_fn = NULL;
ctx.revs = revs;
ctx.show_object = show_object;
ctx.show_commit = show_commit;
ctx.show_data = show_data;
ctx.filter_fn = NULL;
ctx.filter_data = list_objects_filter__init(omitted, filter_options,
&ctx.filter_fn, &filter_free_fn);
do_traverse(&ctx);
if (ctx.filter_data && filter_free_fn)
filter_free_fn(ctx.filter_data);
}