1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-28 08:06:23 +02:00

Merge branch 'ps/reftable-binsearch-updates' into next

Reftable code clean-up and some bugfixes.

* ps/reftable-binsearch-updates:
  reftable/block: avoid decoding keys when searching restart points
  reftable/record: extract function to decode key lengths
  reftable/block: fix error handling when searching restart points
  reftable/block: refactor binary search over restart points
  reftable/refname: refactor binary search over refnames
  reftable/basics: improve `binsearch()` test
  reftable/basics: fix return type of `binsearch()` to be `size_t`
This commit is contained in:
Junio C Hamano 2024-04-04 14:59:00 -07:00
commit 40e6d5a36b
7 changed files with 181 additions and 96 deletions

View File

@ -27,7 +27,7 @@ void put_be16(uint8_t *out, uint16_t i)
out[1] = (uint8_t)(i & 0xff); out[1] = (uint8_t)(i & 0xff);
} }
int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args) size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)
{ {
size_t lo = 0; size_t lo = 0;
size_t hi = sz; size_t hi = sz;
@ -39,8 +39,11 @@ int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args)
*/ */
while (hi - lo > 1) { while (hi - lo > 1) {
size_t mid = lo + (hi - lo) / 2; size_t mid = lo + (hi - lo) / 2;
int ret = f(mid, args);
if (ret < 0)
return sz;
if (f(mid, args)) if (ret > 0)
hi = mid; hi = mid;
else else
lo = mid; lo = mid;

View File

@ -22,13 +22,14 @@ uint32_t get_be24(uint8_t *in);
void put_be16(uint8_t *out, uint16_t i); void put_be16(uint8_t *out, uint16_t i);
/* /*
* find smallest index i in [0, sz) at which f(i) is true, assuming * find smallest index i in [0, sz) at which `f(i) > 0`, assuming that f is
* that f is ascending. Return sz if f(i) is false for all indices. * ascending. Return sz if `f(i) == 0` for all indices. The search is aborted
* and `sz` is returned in case `f(i) < 0`.
* *
* Contrary to bsearch(3), this returns something useful if the argument is not * Contrary to bsearch(3), this returns something useful if the argument is not
* found. * found.
*/ */
int binsearch(size_t sz, int (*f)(size_t k, void *args), void *args); size_t binsearch(size_t sz, int (*f)(size_t k, void *args), void *args);
/* /*
* Frees a NULL terminated array of malloced strings. The array itself is also * Frees a NULL terminated array of malloced strings. The array itself is also

View File

@ -12,40 +12,47 @@ license that can be found in the LICENSE file or at
#include "test_framework.h" #include "test_framework.h"
#include "reftable-tests.h" #include "reftable-tests.h"
struct binsearch_args { struct integer_needle_lesseq_args {
int key; int needle;
int *arr; int *haystack;
}; };
static int binsearch_func(size_t i, void *void_args) static int integer_needle_lesseq(size_t i, void *_args)
{ {
struct binsearch_args *args = void_args; struct integer_needle_lesseq_args *args = _args;
return args->needle <= args->haystack[i];
return args->key < args->arr[i];
} }
static void test_binsearch(void) static void test_binsearch(void)
{ {
int arr[] = { 2, 4, 6, 8, 10 }; int haystack[] = { 2, 4, 6, 8, 10 };
size_t sz = ARRAY_SIZE(arr); struct {
struct binsearch_args args = { int needle;
.arr = arr, size_t expected_idx;
} testcases[] = {
{-9000, 0},
{-1, 0},
{0, 0},
{2, 0},
{3, 1},
{4, 1},
{7, 3},
{9, 4},
{10, 4},
{11, 5},
{9000, 5},
}; };
size_t i = 0;
int i = 0; for (i = 0; i < ARRAY_SIZE(testcases); i++) {
for (i = 1; i < 11; i++) { struct integer_needle_lesseq_args args = {
int res; .haystack = haystack,
args.key = i; .needle = testcases[i].needle,
res = binsearch(sz, &binsearch_func, &args); };
size_t idx;
if (res < sz) { idx = binsearch(ARRAY_SIZE(haystack), &integer_needle_lesseq, &args);
EXPECT(args.key < arr[res]); EXPECT(idx == testcases[i].expected_idx);
if (res > 0) {
EXPECT(args.key >= arr[res - 1]);
}
} else {
EXPECT(args.key == 10 || args.key == 11);
}
} }
} }

View File

@ -273,35 +273,46 @@ void block_reader_start(struct block_reader *br, struct block_iter *it)
it->next_off = br->header_off + 4; it->next_off = br->header_off + 4;
} }
struct restart_find_args { struct restart_needle_less_args {
int error; int error;
struct strbuf key; struct strbuf needle;
struct block_reader *r; struct block_reader *reader;
}; };
static int restart_key_less(size_t idx, void *args) static int restart_needle_less(size_t idx, void *_args)
{ {
struct restart_find_args *a = args; struct restart_needle_less_args *args = _args;
uint32_t off = block_reader_restart_offset(a->r, idx); uint32_t off = block_reader_restart_offset(args->reader, idx);
struct string_view in = { struct string_view in = {
.buf = a->r->block.data + off, .buf = args->reader->block.data + off,
.len = a->r->block_len - off, .len = args->reader->block_len - off,
}; };
uint64_t prefix_len, suffix_len;
uint8_t extra;
int n;
/* the restart key is verbatim in the block, so this could avoid the /*
alloc for decoding the key */ * Records at restart points are stored without prefix compression, so
struct strbuf rkey = STRBUF_INIT; * there is no need to fully decode the record key here. This removes
uint8_t unused_extra; * the need for allocating memory.
int n = reftable_decode_key(&rkey, &unused_extra, in); */
int result; n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
if (n < 0) { if (n < 0 || prefix_len) {
a->error = 1; args->error = 1;
return -1; return -1;
} }
result = strbuf_cmp(&a->key, &rkey); string_view_consume(&in, n);
strbuf_release(&rkey); if (suffix_len > in.len) {
return result < 0; args->error = 1;
return -1;
}
n = memcmp(args->needle.buf, in.buf,
args->needle.len < suffix_len ? args->needle.len : suffix_len);
if (n)
return n < 0;
return args->needle.len < suffix_len;
} }
void block_iter_copy_from(struct block_iter *dest, struct block_iter *src) void block_iter_copy_from(struct block_iter *dest, struct block_iter *src)
@ -376,20 +387,51 @@ void block_iter_close(struct block_iter *it)
int block_reader_seek(struct block_reader *br, struct block_iter *it, int block_reader_seek(struct block_reader *br, struct block_iter *it,
struct strbuf *want) struct strbuf *want)
{ {
struct restart_find_args args = { struct restart_needle_less_args args = {
.key = *want, .needle = *want,
.r = br, .reader = br,
}; };
struct block_iter next = BLOCK_ITER_INIT; struct block_iter next = BLOCK_ITER_INIT;
struct reftable_record rec; struct reftable_record rec;
int err = 0, i; int err = 0;
size_t i;
/*
* Perform a binary search over the block's restart points, which
* avoids doing a linear scan over the whole block. Like this, we
* identify the section of the block that should contain our key.
*
* Note that we explicitly search for the first restart point _greater_
* than the sought-after record, not _greater or equal_ to it. In case
* the sought-after record is located directly at the restart point we
* would otherwise start doing the linear search at the preceding
* restart point. While that works alright, we would end up scanning
* too many record.
*/
i = binsearch(br->restart_count, &restart_needle_less, &args);
if (args.error) { if (args.error) {
err = REFTABLE_FORMAT_ERROR; err = REFTABLE_FORMAT_ERROR;
goto done; goto done;
} }
i = binsearch(br->restart_count, &restart_key_less, &args); /*
* Now there are multiple cases:
*
* - `i == 0`: The wanted record is smaller than the record found at
* the first restart point. As the first restart point is the first
* record in the block, our wanted record cannot be located in this
* block at all. We still need to position the iterator so that the
* next call to `block_iter_next()` will yield an end-of-iterator
* signal.
*
* - `i == restart_count`: The wanted record was not found at any of
* the restart points. As there is no restart point at the end of
* the section the record may thus be contained in the last block.
*
* - `i > 0`: The wanted record must be contained in the section
* before the found restart point. We thus do a linear search
* starting from the preceding restart point.
*/
if (i > 0) if (i > 0)
it->next_off = block_reader_restart_offset(br, i - 1); it->next_off = block_reader_restart_offset(br, i - 1);
else else
@ -398,21 +440,34 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
reftable_record_init(&rec, block_reader_type(br)); reftable_record_init(&rec, block_reader_type(br));
/* We're looking for the last entry less/equal than the wanted key, so /*
we have to go one entry too far and then back up. * We're looking for the last entry less than the wanted key so that
*/ * the next call to `block_reader_next()` would yield the wanted
* record. We thus don't want to position our reader at the sought
* after record, but one before. To do so, we have to go one entry too
* far and then back up.
*/
while (1) { while (1) {
block_iter_copy_from(&next, it); block_iter_copy_from(&next, it);
err = block_iter_next(&next, &rec); err = block_iter_next(&next, &rec);
if (err < 0) if (err < 0)
goto done; goto done;
if (err > 0) {
reftable_record_key(&rec, &it->last_key);
if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) {
err = 0; err = 0;
goto done; goto done;
} }
/*
* Check whether the current key is greater or equal to the
* sought-after key. In case it is greater we know that the
* record does not exist in the block and can thus abort early.
* In case it is equal to the sought-after key we have found
* the desired record.
*/
reftable_record_key(&rec, &it->last_key);
if (strbuf_cmp(&it->last_key, want) >= 0)
goto done;
block_iter_copy_from(it, &next); block_iter_copy_from(it, &next);
} }

View File

@ -159,6 +159,30 @@ int reftable_encode_key(int *restart, struct string_view dest,
return start.len - dest.len; return start.len - dest.len;
} }
int reftable_decode_keylen(struct string_view in,
uint64_t *prefix_len,
uint64_t *suffix_len,
uint8_t *extra)
{
size_t start_len = in.len;
int n;
n = get_var_int(prefix_len, &in);
if (n < 0)
return -1;
string_view_consume(&in, n);
n = get_var_int(suffix_len, &in);
if (n <= 0)
return -1;
string_view_consume(&in, n);
*extra = (uint8_t)(*suffix_len & 0x7);
*suffix_len >>= 3;
return start_len - in.len;
}
int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
struct string_view in) struct string_view in)
{ {
@ -167,19 +191,11 @@ int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
uint64_t suffix_len = 0; uint64_t suffix_len = 0;
int n; int n;
n = get_var_int(&prefix_len, &in); n = reftable_decode_keylen(in, &prefix_len, &suffix_len, extra);
if (n < 0) if (n < 0)
return -1; return -1;
string_view_consume(&in, n); string_view_consume(&in, n);
n = get_var_int(&suffix_len, &in);
if (n <= 0)
return -1;
string_view_consume(&in, n);
*extra = (uint8_t)(suffix_len & 0x7);
suffix_len >>= 3;
if (in.len < suffix_len || if (in.len < suffix_len ||
prefix_len > last_key->len) prefix_len > last_key->len)
return -1; return -1;

View File

@ -86,6 +86,12 @@ int reftable_encode_key(int *is_restart, struct string_view dest,
struct strbuf prev_key, struct strbuf key, struct strbuf prev_key, struct strbuf key,
uint8_t extra); uint8_t extra);
/* Decode a record's key lengths. */
int reftable_decode_keylen(struct string_view in,
uint64_t *prefix_len,
uint64_t *suffix_len,
uint8_t *extra);
/* /*
* Decode into `last_key` and `extra` from `in`. `last_key` is expected to * Decode into `last_key` and `extra` from `in`. `last_key` is expected to
* contain the decoded key of the preceding record, if any. * contain the decoded key of the preceding record, if any.

View File

@ -12,15 +12,15 @@
#include "refname.h" #include "refname.h"
#include "reftable-iterator.h" #include "reftable-iterator.h"
struct find_arg { struct refname_needle_lesseq_args {
char **names; char **haystack;
const char *want; const char *needle;
}; };
static int find_name(size_t k, void *arg) static int refname_needle_lesseq(size_t k, void *_args)
{ {
struct find_arg *f_arg = arg; struct refname_needle_lesseq_args *args = _args;
return strcmp(f_arg->names[k], f_arg->want) >= 0; return strcmp(args->needle, args->haystack[k]) <= 0;
} }
static int modification_has_ref(struct modification *mod, const char *name) static int modification_has_ref(struct modification *mod, const char *name)
@ -29,25 +29,23 @@ static int modification_has_ref(struct modification *mod, const char *name)
int err = 0; int err = 0;
if (mod->add_len > 0) { if (mod->add_len > 0) {
struct find_arg arg = { struct refname_needle_lesseq_args args = {
.names = mod->add, .haystack = mod->add,
.want = name, .needle = name,
}; };
int idx = binsearch(mod->add_len, find_name, &arg); size_t idx = binsearch(mod->add_len, refname_needle_lesseq, &args);
if (idx < mod->add_len && !strcmp(mod->add[idx], name)) { if (idx < mod->add_len && !strcmp(mod->add[idx], name))
return 0; return 0;
}
} }
if (mod->del_len > 0) { if (mod->del_len > 0) {
struct find_arg arg = { struct refname_needle_lesseq_args args = {
.names = mod->del, .haystack = mod->del,
.want = name, .needle = name,
}; };
int idx = binsearch(mod->del_len, find_name, &arg); size_t idx = binsearch(mod->del_len, refname_needle_lesseq, &args);
if (idx < mod->del_len && !strcmp(mod->del[idx], name)) { if (idx < mod->del_len && !strcmp(mod->del[idx], name))
return 1; return 1;
}
} }
err = reftable_table_read_ref(&mod->tab, name, &ref); err = reftable_table_read_ref(&mod->tab, name, &ref);
@ -73,11 +71,11 @@ static int modification_has_ref_with_prefix(struct modification *mod,
int err = 0; int err = 0;
if (mod->add_len > 0) { if (mod->add_len > 0) {
struct find_arg arg = { struct refname_needle_lesseq_args args = {
.names = mod->add, .haystack = mod->add,
.want = prefix, .needle = prefix,
}; };
int idx = binsearch(mod->add_len, find_name, &arg); size_t idx = binsearch(mod->add_len, refname_needle_lesseq, &args);
if (idx < mod->add_len && if (idx < mod->add_len &&
!strncmp(prefix, mod->add[idx], strlen(prefix))) !strncmp(prefix, mod->add[idx], strlen(prefix)))
goto done; goto done;
@ -92,15 +90,14 @@ static int modification_has_ref_with_prefix(struct modification *mod,
goto done; goto done;
if (mod->del_len > 0) { if (mod->del_len > 0) {
struct find_arg arg = { struct refname_needle_lesseq_args args = {
.names = mod->del, .haystack = mod->del,
.want = ref.refname, .needle = ref.refname,
}; };
int idx = binsearch(mod->del_len, find_name, &arg); size_t idx = binsearch(mod->del_len, refname_needle_lesseq, &args);
if (idx < mod->del_len && if (idx < mod->del_len &&
!strcmp(ref.refname, mod->del[idx])) { !strcmp(ref.refname, mod->del[idx]))
continue; continue;
}
} }
if (strncmp(ref.refname, prefix, strlen(prefix))) { if (strncmp(ref.refname, prefix, strlen(prefix))) {