1
0
mirror of https://github.com/git/git.git synced 2024-10-19 21:08:12 +02:00
git/t/helper/test-bloom.c
Taylor Blau 312cff5207 bloom: split 'get_bloom_filter()' in two
'get_bloom_filter' takes a flag to control whether it will compute a
Bloom filter if the requested one is missing. In the next patch, we'll
add yet another parameter to this method, which would force all but one
caller to specify an extra 'NULL' parameter at the end.

Instead of doing this, split 'get_bloom_filter' into two functions:
'get_bloom_filter' and 'get_or_compute_bloom_filter'. The former only
looks up a Bloom filter (and does not compute one if it's missing,
thus dropping the 'compute_if_not_present' flag). The latter does
compute missing Bloom filters, with an additional parameter to store
whether or not it needed to do so.

This simplifies many call-sites, since the majority of existing callers
to 'get_bloom_filter' do not want missing Bloom filters to be computed
(so they can drop the parameter entirely and use the simpler version of
the function).

While we're at it, instrument the new 'get_or_compute_bloom_filter()'
with counters in the 'write_commit_graph_context' struct which store
the number of filters that we did and didn't compute, as well as filters
that were truncated.

It would be nice to drop the 'compute_if_not_present' flag entirely,
since all remaining callers of 'get_or_compute_bloom_filter' pass it as
'1', but this will change in a future patch and hence cannot be removed.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-09-17 09:31:25 -07:00

95 lines
2.2 KiB
C

#include "git-compat-util.h"
#include "bloom.h"
#include "test-tool.h"
#include "commit.h"
static struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
static void add_string_to_filter(const char *data, struct bloom_filter *filter) {
struct bloom_key key;
int i;
fill_bloom_key(data, strlen(data), &key, &settings);
printf("Hashes:");
for (i = 0; i < settings.num_hashes; i++){
printf("0x%08x|", key.hashes[i]);
}
printf("\n");
add_key_to_filter(&key, filter, &settings);
}
static void print_bloom_filter(struct bloom_filter *filter) {
int i;
if (!filter) {
printf("No filter.\n");
return;
}
printf("Filter_Length:%d\n", (int)filter->len);
printf("Filter_Data:");
for (i = 0; i < filter->len; i++) {
printf("%02x|", filter->data[i]);
}
printf("\n");
}
static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
{
struct commit *c;
struct bloom_filter *filter;
setup_git_directory();
c = lookup_commit(the_repository, commit_oid);
filter = get_or_compute_bloom_filter(the_repository, c, 1,
NULL);
print_bloom_filter(filter);
}
static const char *bloom_usage = "\n"
" test-tool bloom get_murmur3 <string>\n"
" test-tool bloom generate_filter <string> [<string>...]\n"
" test-tool get_filter_for_commit <commit-hex>\n";
int cmd__bloom(int argc, const char **argv)
{
if (argc < 2)
usage(bloom_usage);
if (!strcmp(argv[1], "get_murmur3")) {
uint32_t hashed;
if (argc < 3)
usage(bloom_usage);
hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}
if (!strcmp(argv[1], "generate_filter")) {
struct bloom_filter filter;
int i = 2;
filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
filter.data = xcalloc(filter.len, sizeof(unsigned char));
if (argc - 1 < i)
usage(bloom_usage);
while (argv[i]) {
add_string_to_filter(argv[i], &filter);
i++;
}
print_bloom_filter(&filter);
}
if (!strcmp(argv[1], "get_filter_for_commit")) {
struct object_id oid;
const char *end;
if (argc < 3)
usage(bloom_usage);
if (parse_oid_hex(argv[2], &oid, &end))
die("cannot parse oid '%s'", argv[2]);
init_bloom_filters();
get_bloom_filter_for_commit(&oid);
}
return 0;
}