1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-08 07:26:07 +02:00
git/commit-graph.h
Abhishek Kumar e8b63005c4 commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to
distinguish between generation numbers v1 and v2 [1]. Thus, one of
pre-requistes before implementing generation number v2 was to
distinguish between graph versions in a backwards compatible manner.

We are going to introduce a new chunk called Generation DATa chunk (or
GDAT). GDAT will store corrected committer date offsets whereas CDAT
will still store topological level.

Old Git does not understand GDAT chunk and would ignore it, reading
topological levels from CDAT. New Git can parse GDAT and take advantage
of newer generation numbers, falling back to topological levels when
GDAT chunk is missing (as it would happen with a commit-graph written
by old Git).

We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT'
which forces commit-graph file to be written without generation data
chunk to emulate a commit-graph file written by old Git.

To minimize the space required to store corrrected commit date, Git
stores corrected commit date offsets into the commit-graph file, instea
of corrected commit dates. This saves us 4 bytes per commit, decreasing
the GDAT chunk size by half, but it's possible for the offset to
overflow the 4-bytes allocated for storage. As such overflows are and
should be exceedingly rare, we use the following overflow management
scheme:

We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV')
to store corrected commit dates for commits with offsets greater than
GENERATION_NUMBER_V2_OFFSET_MAX.

If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set
the MSB of the offset and the other bits store the position of corrected
commit date in GDOV chunk, similar to how Extra Edge List is maintained.

We test the overflow-related code with the following repo history:

           F - N - U
          /         \
U - N - U            N
         \          /
	  N - F - N

Where the commits denoted by U have committer date of zero seconds
since Unix epoch, the commits denoted by N have committer date of
1112354055 (default committer date for the test suite) seconds since
Unix epoch and the commits denoted by F have committer date of
(2 ^ 31 - 2) seconds since Unix epoch.

The largest offset observed is 2 ^ 31, just large enough to overflow.

[1]: https://lore.kernel.org/git/87a7gdspo4.fsf@evledraar.gmail.com/

Signed-off-by: Abhishek Kumar <abhishekkumar8222@gmail.com>
Reviewed-by: Taylor Blau <me@ttaylorr.com>
Reviewed-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-18 16:21:18 -08:00

160 lines
4.9 KiB
C

#ifndef COMMIT_GRAPH_H
#define COMMIT_GRAPH_H
#include "git-compat-util.h"
#include "object-store.h"
#include "oidset.h"
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
#define GIT_TEST_COMMIT_GRAPH_NO_GDAT "GIT_TEST_COMMIT_GRAPH_NO_GDAT"
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
/*
* This method is only used to enhance coverage of the commit-graph
* feature in the test suite with the GIT_TEST_COMMIT_GRAPH and
* GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS environment variables. Do not
* call this method oustide of a builtin, and only if you know what
* you are doing!
*/
void git_test_write_commit_graph_or_die(void);
struct commit;
struct bloom_filter_settings;
struct repository;
struct raw_object_store;
struct string_list;
char *get_commit_graph_filename(struct object_directory *odb);
char *get_commit_graph_chain_filename(struct object_directory *odb);
int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
/*
* Given a commit struct, try to fill the commit struct info, including:
* 1. tree object
* 2. date
* 3. parents.
*
* Returns 1 if and only if the commit was found in the packed graph.
*
* See parse_commit_buffer() for the fallback after this call.
*/
int parse_commit_in_graph(struct repository *r, struct commit *item);
/*
* It is possible that we loaded commit contents from the commit buffer,
* but we also want to ensure the commit-graph content is correctly
* checked and filled. Fill the graph_pos and generation members of
* the given commit.
*/
void load_commit_graph_info(struct repository *r, struct commit *item);
struct tree *get_commit_tree_in_graph(struct repository *r,
const struct commit *c);
struct commit_graph {
const unsigned char *data;
size_t data_len;
unsigned char hash_len;
unsigned char num_chunks;
uint32_t num_commits;
struct object_id oid;
char *filename;
struct object_directory *odb;
uint32_t num_commits_in_base;
struct commit_graph *base_graph;
const uint32_t *chunk_oid_fanout;
const unsigned char *chunk_oid_lookup;
const unsigned char *chunk_commit_data;
const unsigned char *chunk_generation_data;
const unsigned char *chunk_generation_data_overflow;
const unsigned char *chunk_extra_edges;
const unsigned char *chunk_base_graphs;
const unsigned char *chunk_bloom_indexes;
const unsigned char *chunk_bloom_data;
struct topo_level_slab *topo_levels;
struct bloom_filter_settings *bloom_filter_settings;
};
struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
int fd, struct stat *st,
struct object_directory *odb);
struct commit_graph *read_commit_graph_one(struct repository *r,
struct object_directory *odb);
struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size);
/*
* Return 1 if and only if the repository has a commit-graph
* file and generation numbers are computed in that file.
*/
int generation_numbers_enabled(struct repository *r);
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r);
enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_APPEND = (1 << 0),
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
COMMIT_GRAPH_WRITE_SPLIT = (1 << 2),
COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
};
enum commit_graph_split_flags {
COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0,
COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
COMMIT_GRAPH_SPLIT_REPLACE = 2
};
struct commit_graph_opts {
int size_multiple;
int max_commits;
timestamp_t expire_time;
enum commit_graph_split_flags split_flags;
int max_new_filters;
};
/*
* The write_commit_graph* methods return zero on success
* and a negative value on failure. Note that if the repository
* is not compatible with the commit-graph feature, then the
* methods will return 0 without writing a commit-graph.
*/
int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags,
const struct commit_graph_opts *opts);
int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes,
struct oidset *commits,
enum commit_graph_write_flags flags,
const struct commit_graph_opts *opts);
#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0)
int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags);
void close_commit_graph(struct raw_object_store *);
void free_commit_graph(struct commit_graph *);
/*
* Disable further use of the commit graph in this process when parsing a
* "struct commit".
*/
void disable_commit_graph(struct repository *r);
struct commit_graph_data {
uint32_t graph_pos;
timestamp_t generation;
};
/*
* Commits should be parsed before accessing generation, graph positions.
*/
timestamp_t commit_graph_generation(const struct commit *);
uint32_t commit_graph_position(const struct commit *);
#endif