1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-18 10:06:10 +02:00
git/commit-graph.h
Abhishek Kumar 1fdc383c5c commit-graph: use generation v2 only if entire chain does
Since there are released versions of Git that understand generation
numbers in the commit-graph's CDAT chunk but do not understand the GDAT
chunk, the following scenario is possible:

1. "New" Git writes a commit-graph with the GDAT chunk.
2. "Old" Git writes a split commit-graph on top without a GDAT chunk.

If each layer of split commit-graph is treated independently, as it was
the case before this commit, with Git inspecting only the current layer
for chunk_generation_data pointer, commits in the lower layer (one with
GDAT) whould have corrected commit date as their generation number,
while commits in the upper layer would have topological levels as their
generation. Corrected commit dates usually have much larger values than
topological levels. This means that if we take two commits, one from the
upper layer, and one reachable from it in the lower layer, then the
expectation that the generation of a parent is smaller than the
generation of a child would be violated.

It is difficult to expose this issue in a test. Since we _start_ with
artificially low generation numbers, any commit walk that prioritizes
generation numbers will walk all of the commits with high generation
number before walking the commits with low generation number. In all the
cases I tried, the commit-graph layers themselves "protect" any
incorrect behavior since none of the commits in the lower layer can
reach the commits in the upper layer.

This issue would manifest itself as a performance problem in this case,
especially with something like "git log --graph" since the low
generation numbers would cause the in-degree queue to walk all of the
commits in the lower layer before allowing the topo-order queue to write
anything to output (depending on the size of the upper layer).

Therefore, When writing the new layer in split commit-graph, we write a
GDAT chunk only if the topmost layer has a GDAT chunk. This guarantees
that if a layer has GDAT chunk, all lower layers must have a GDAT chunk
as well.

Rewriting layers follows similar approach: if the topmost layer below
the set of layers being rewritten (in the split commit-graph chain)
exists, and it does not contain GDAT chunk, then the result of rewrite
does not have GDAT chunks either.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-18 16:21:18 -08:00

161 lines
5.0 KiB
C

#ifndef COMMIT_GRAPH_H
#define COMMIT_GRAPH_H
#include "git-compat-util.h"
#include "object-store.h"
#include "oidset.h"
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
#define GIT_TEST_COMMIT_GRAPH_NO_GDAT "GIT_TEST_COMMIT_GRAPH_NO_GDAT"
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
/*
* This method is only used to enhance coverage of the commit-graph
* feature in the test suite with the GIT_TEST_COMMIT_GRAPH and
* GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS environment variables. Do not
* call this method oustide of a builtin, and only if you know what
* you are doing!
*/
void git_test_write_commit_graph_or_die(void);
struct commit;
struct bloom_filter_settings;
struct repository;
struct raw_object_store;
struct string_list;
char *get_commit_graph_filename(struct object_directory *odb);
char *get_commit_graph_chain_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
/*
* Given a commit struct, try to fill the commit struct info, including:
* 1. tree object
* 2. date
* 3. parents.
*
* Returns 1 if and only if the commit was found in the packed graph.
*
* See parse_commit_buffer() for the fallback after this call.
*/
int parse_commit_in_graph(struct repository *r, struct commit *item);
/*
* It is possible that we loaded commit contents from the commit buffer,
* but we also want to ensure the commit-graph content is correctly
* checked and filled. Fill the graph_pos and generation members of
* the given commit.
*/
void load_commit_graph_info(struct repository *r, struct commit *item);
struct tree *get_commit_tree_in_graph(struct repository *r,
const struct commit *c);
struct commit_graph {
const unsigned char *data;
size_t data_len;
unsigned char hash_len;
unsigned char num_chunks;
uint32_t num_commits;
struct object_id oid;
char *filename;
struct object_directory *odb;
uint32_t num_commits_in_base;
unsigned int read_generation_data;
struct commit_graph *base_graph;
const uint32_t *chunk_oid_fanout;
const unsigned char *chunk_oid_lookup;
const unsigned char *chunk_commit_data;
const unsigned char *chunk_generation_data;
const unsigned char *chunk_generation_data_overflow;
const unsigned char *chunk_extra_edges;
const unsigned char *chunk_base_graphs;
const unsigned char *chunk_bloom_indexes;
const unsigned char *chunk_bloom_data;
struct topo_level_slab *topo_levels;
struct bloom_filter_settings *bloom_filter_settings;
};
struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
int fd, struct stat *st,
struct object_directory *odb);
struct commit_graph *read_commit_graph_one(struct repository *r,
struct object_directory *odb);
struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size);
/*
* Return 1 if and only if the repository has a commit-graph
* file and generation numbers are computed in that file.
*/
int generation_numbers_enabled(struct repository *r);
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r);
enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_APPEND = (1 << 0),
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
COMMIT_GRAPH_WRITE_SPLIT = (1 << 2),
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
};
enum commit_graph_split_flags {
COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0,
COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
COMMIT_GRAPH_SPLIT_REPLACE = 2
};
struct commit_graph_opts {
int size_multiple;
int max_commits;
timestamp_t expire_time;
enum commit_graph_split_flags split_flags;
int max_new_filters;
};
/*
* The write_commit_graph* methods return zero on success
* and a negative value on failure. Note that if the repository
* is not compatible with the commit-graph feature, then the
* methods will return 0 without writing a commit-graph.
*/
int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags,
const struct commit_graph_opts *opts);
int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes,
struct oidset *commits,
enum commit_graph_write_flags flags,
const struct commit_graph_opts *opts);
#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0)
int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags);
void close_commit_graph(struct raw_object_store *);
void free_commit_graph(struct commit_graph *);
/*
* Disable further use of the commit graph in this process when parsing a
* "struct commit".
*/
void disable_commit_graph(struct repository *r);
struct commit_graph_data {
uint32_t graph_pos;
timestamp_t generation;
};
/*
* Commits should be parsed before accessing generation, graph positions.
*/
timestamp_t commit_graph_generation(const struct commit *);
uint32_t commit_graph_position(const struct commit *);
#endif