1
0
Fork 0
mirror of https://github.com/git/git.git synced 2024-05-19 01:16:08 +02:00

Merge branch 'ps/reftable-iteration-perf-part2'

The code to iterate over refs with the reftable backend has seen
some optimization.

* ps/reftable-iteration-perf-part2:
  refs/reftable: precompute prefix length
  reftable: allow inlining of a few functions
  reftable/record: decode keys in place
  reftable/record: reuse refname when copying
  reftable/record: reuse refname when decoding
  reftable/merged: avoid duplicate pqueue emptiness check
  reftable/merged: circumvent pqueue with single subiter
  reftable/merged: handle subiter cleanup on close only
  reftable/merged: remove unnecessary null check for subiters
  reftable/merged: make subiters own their records
  reftable/merged: advance subiter on subsequent iteration
  reftable/merged: make `merged_iter` structure private
  reftable/pq: use `size_t` to track iterator index
This commit is contained in:
Junio C Hamano 2024-03-14 14:05:23 -07:00
commit 448a74e151
14 changed files with 184 additions and 190 deletions

View File

@ -346,6 +346,7 @@ struct reftable_ref_iterator {
struct object_id oid;
const char *prefix;
size_t prefix_len;
unsigned int flags;
int err;
};
@ -374,8 +375,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator)
continue;
}
if (iter->prefix &&
strncmp(iter->prefix, iter->ref.refname, strlen(iter->prefix))) {
if (iter->prefix_len &&
strncmp(iter->prefix, iter->ref.refname, iter->prefix_len)) {
iter->err = 1;
break;
}
@ -484,6 +485,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_
iter = xcalloc(1, sizeof(*iter));
base_ref_iterator_init(&iter->base, &reftable_ref_iterator_vtable);
iter->prefix = prefix;
iter->prefix_len = prefix ? strlen(prefix) : 0;
iter->base.oid = &iter->oid;
iter->flags = flags;
iter->refs = refs;

View File

@ -291,9 +291,8 @@ static int restart_key_less(size_t idx, void *args)
/* the restart key is verbatim in the block, so this could avoid the
alloc for decoding the key */
struct strbuf rkey = STRBUF_INIT;
struct strbuf last_key = STRBUF_INIT;
uint8_t unused_extra;
int n = reftable_decode_key(&rkey, &unused_extra, last_key, in);
int n = reftable_decode_key(&rkey, &unused_extra, in);
int result;
if (n < 0) {
a->error = 1;
@ -326,35 +325,34 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
if (it->next_off >= it->br->block_len)
return 1;
n = reftable_decode_key(&it->key, &extra, it->last_key, in);
n = reftable_decode_key(&it->last_key, &extra, in);
if (n < 0)
return -1;
if (!it->key.len)
if (!it->last_key.len)
return REFTABLE_FORMAT_ERROR;
string_view_consume(&in, n);
n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size);
n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size);
if (n < 0)
return -1;
string_view_consume(&in, n);
strbuf_swap(&it->last_key, &it->key);
it->next_off += start.len - in.len;
return 0;
}
int block_reader_first_key(struct block_reader *br, struct strbuf *key)
{
struct strbuf empty = STRBUF_INIT;
int off = br->header_off + 4;
int off = br->header_off + 4, n;
struct string_view in = {
.buf = br->block.data + off,
.len = br->block_len - off,
};
uint8_t extra = 0;
int n = reftable_decode_key(key, &extra, empty, in);
strbuf_reset(key);
n = reftable_decode_key(key, &extra, in);
if (n < 0)
return n;
if (!key->len)
@ -371,7 +369,6 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want)
void block_iter_close(struct block_iter *it)
{
strbuf_release(&it->last_key);
strbuf_release(&it->key);
}
int block_reader_seek(struct block_reader *br, struct block_iter *it,
@ -408,8 +405,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it,
if (err < 0)
goto done;
reftable_record_key(&rec, &it->key);
if (err > 0 || strbuf_cmp(&it->key, want) >= 0) {
reftable_record_key(&rec, &it->last_key);
if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) {
err = 0;
goto done;
}

View File

@ -84,12 +84,10 @@ struct block_iter {
/* key for last entry we read. */
struct strbuf last_key;
struct strbuf key;
};
#define BLOCK_ITER_INIT { \
.last_key = STRBUF_INIT, \
.key = STRBUF_INIT, \
}
/* initializes a block reader. */

View File

@ -16,11 +16,6 @@ license that can be found in the LICENSE file or at
#include "reader.h"
#include "reftable-error.h"
int iterator_is_null(struct reftable_iterator *it)
{
return !it->ops;
}
static void filtering_ref_iterator_close(void *iter_arg)
{
struct filtering_ref_iterator *fri = iter_arg;

View File

@ -16,10 +16,6 @@ license that can be found in the LICENSE file or at
#include "reftable-iterator.h"
#include "reftable-generic.h"
/* Returns true for a zeroed out iterator, such as the one returned from
* iterator_destroy. */
int iterator_is_null(struct reftable_iterator *it);
/* iterator that produces only ref records that point to `oid` */
struct filtering_ref_iterator {
int double_check;

View File

@ -17,23 +17,37 @@ license that can be found in the LICENSE file or at
#include "reftable-error.h"
#include "system.h"
struct merged_subiter {
struct reftable_iterator iter;
struct reftable_record rec;
};
struct merged_iter {
struct merged_subiter *subiters;
struct merged_iter_pqueue pq;
uint32_t hash_id;
size_t stack_len;
uint8_t typ;
int suppress_deletions;
ssize_t advance_index;
};
static int merged_iter_init(struct merged_iter *mi)
{
for (size_t i = 0; i < mi->stack_len; i++) {
struct pq_entry e = {
.index = i,
.rec = &mi->subiters[i].rec,
};
int err;
reftable_record_init(&e.rec, mi->typ);
err = iterator_next(&mi->stack[i], &e.rec);
reftable_record_init(&mi->subiters[i].rec, mi->typ);
err = iterator_next(&mi->subiters[i].iter,
&mi->subiters[i].rec);
if (err < 0)
return err;
if (err > 0) {
reftable_iterator_destroy(&mi->stack[i]);
reftable_record_release(&e.rec);
if (err > 0)
continue;
}
merged_iter_pqueue_add(&mi->pq, &e);
}
@ -46,54 +60,66 @@ static void merged_iter_close(void *p)
struct merged_iter *mi = p;
merged_iter_pqueue_release(&mi->pq);
for (size_t i = 0; i < mi->stack_len; i++)
reftable_iterator_destroy(&mi->stack[i]);
reftable_free(mi->stack);
}
static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
size_t idx)
{
struct pq_entry e = {
.index = idx,
};
int err;
reftable_record_init(&e.rec, mi->typ);
err = iterator_next(&mi->stack[idx], &e.rec);
if (err < 0)
return err;
if (err > 0) {
reftable_iterator_destroy(&mi->stack[idx]);
reftable_record_release(&e.rec);
return 0;
for (size_t i = 0; i < mi->stack_len; i++) {
reftable_iterator_destroy(&mi->subiters[i].iter);
reftable_record_release(&mi->subiters[i].rec);
}
merged_iter_pqueue_add(&mi->pq, &e);
return 0;
reftable_free(mi->subiters);
}
static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
{
if (iterator_is_null(&mi->stack[idx]))
return 0;
return merged_iter_advance_nonnull_subiter(mi, idx);
struct pq_entry e = {
.index = idx,
.rec = &mi->subiters[idx].rec,
};
int err;
err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
if (err)
return err;
merged_iter_pqueue_add(&mi->pq, &e);
return 0;
}
static int merged_iter_next_entry(struct merged_iter *mi,
struct reftable_record *rec)
{
struct pq_entry entry = { 0 };
int err = 0;
int err = 0, empty;
if (merged_iter_pqueue_is_empty(mi->pq))
empty = merged_iter_pqueue_is_empty(mi->pq);
if (mi->advance_index >= 0) {
/*
* When there are no pqueue entries then we only have a single
* subiter left. There is no need to use the pqueue in that
* case anymore as we know that the subiter will return entries
* in the correct order already.
*
* While this may sound like a very specific edge case, it may
* happen more frequently than you think. Most repositories
* will end up having a single large base table that contains
* most of the refs. It's thus likely that we exhaust all
* subiters but the one from that base ref.
*/
if (empty)
return iterator_next(&mi->subiters[mi->advance_index].iter,
rec);
err = merged_iter_advance_subiter(mi, mi->advance_index);
if (err < 0)
return err;
if (!err)
empty = 0;
mi->advance_index = -1;
}
if (empty)
return 1;
entry = merged_iter_pqueue_remove(&mi->pq);
err = merged_iter_advance_subiter(mi, entry.index);
if (err < 0)
return err;
/*
One can also use reftable as datacenter-local storage, where the ref
@ -107,54 +133,32 @@ static int merged_iter_next_entry(struct merged_iter *mi,
struct pq_entry top = merged_iter_pqueue_top(mi->pq);
int cmp;
/*
* When the next entry comes from the same queue as the current
* entry then it must by definition be larger. This avoids a
* comparison in the most common case.
*/
if (top.index == entry.index)
break;
cmp = reftable_record_cmp(&top.rec, &entry.rec);
cmp = reftable_record_cmp(top.rec, entry.rec);
if (cmp > 0)
break;
merged_iter_pqueue_remove(&mi->pq);
err = merged_iter_advance_subiter(mi, top.index);
if (err < 0)
goto done;
reftable_record_release(&top.rec);
return err;
}
reftable_record_release(rec);
*rec = entry.rec;
done:
if (err)
reftable_record_release(&entry.rec);
return err;
}
static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec)
{
while (1) {
int err = merged_iter_next_entry(mi, rec);
if (err == 0 && mi->suppress_deletions &&
reftable_record_is_deletion(rec)) {
continue;
}
return err;
}
mi->advance_index = entry.index;
SWAP(*rec, *entry.rec);
return 0;
}
static int merged_iter_next_void(void *p, struct reftable_record *rec)
{
struct merged_iter *mi = p;
if (merged_iter_pqueue_is_empty(mi->pq))
return 1;
return merged_iter_next(mi, rec);
while (1) {
int err = merged_iter_next_entry(mi, rec);
if (err)
return err;
if (mi->suppress_deletions && reftable_record_is_deletion(rec))
continue;
return 0;
}
}
static struct reftable_iterator_vtable merged_iter_vtable = {
@ -246,14 +250,15 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
.typ = reftable_record_type(rec),
.hash_id = mt->hash_id,
.suppress_deletions = mt->suppress_deletions,
.advance_index = -1,
};
struct merged_iter *p;
int err;
REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len);
REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
for (size_t i = 0; i < mt->stack_len; i++) {
err = reftable_table_seek_record(&mt->stack[i],
&merged.stack[merged.stack_len], rec);
&merged.subiters[merged.stack_len].iter, rec);
if (err < 0)
goto out;
if (!err)

View File

@ -9,7 +9,7 @@ license that can be found in the LICENSE file or at
#ifndef MERGED_H
#define MERGED_H
#include "pq.h"
#include "system.h"
struct reftable_merged_table {
struct reftable_table *stack;
@ -24,15 +24,6 @@ struct reftable_merged_table {
uint64_t max;
};
struct merged_iter {
struct reftable_iterator *stack;
uint32_t hash_id;
size_t stack_len;
uint8_t typ;
int suppress_deletions;
struct merged_iter_pqueue pq;
};
void merged_table_release(struct reftable_merged_table *mt);
#endif

View File

@ -14,22 +14,12 @@ license that can be found in the LICENSE file or at
int pq_less(struct pq_entry *a, struct pq_entry *b)
{
int cmp = reftable_record_cmp(&a->rec, &b->rec);
int cmp = reftable_record_cmp(a->rec, b->rec);
if (cmp == 0)
return a->index > b->index;
return cmp < 0;
}
struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq)
{
return pq.heap[0];
}
int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq)
{
return pq.len == 0;
}
struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq)
{
int i = 0;
@ -82,10 +72,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry
void merged_iter_pqueue_release(struct merged_iter_pqueue *pq)
{
int i = 0;
for (i = 0; i < pq->len; i++) {
reftable_record_release(&pq->heap[i].rec);
}
FREE_AND_NULL(pq->heap);
pq->len = pq->cap = 0;
memset(pq, 0, sizeof(*pq));
}

View File

@ -12,8 +12,8 @@ license that can be found in the LICENSE file or at
#include "record.h"
struct pq_entry {
int index;
struct reftable_record rec;
size_t index;
struct reftable_record *rec;
};
struct merged_iter_pqueue {
@ -22,12 +22,20 @@ struct merged_iter_pqueue {
size_t cap;
};
struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq);
int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq);
void merged_iter_pqueue_check(struct merged_iter_pqueue pq);
struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq);
void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e);
void merged_iter_pqueue_release(struct merged_iter_pqueue *pq);
int pq_less(struct pq_entry *a, struct pq_entry *b);
static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq)
{
return pq.heap[0];
}
static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq)
{
return pq.len == 0;
}
#endif

View File

@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq)
static void test_pq(void)
{
char *names[54] = { NULL };
int N = ARRAY_SIZE(names) - 1;
struct merged_iter_pqueue pq = { NULL };
struct reftable_record recs[54];
int N = ARRAY_SIZE(recs) - 1, i;
char *last = NULL;
int i = 0;
for (i = 0; i < N; i++) {
char name[100];
snprintf(name, sizeof(name), "%02d", i);
names[i] = xstrdup(name);
struct strbuf refname = STRBUF_INIT;
strbuf_addf(&refname, "%02d", i);
reftable_record_init(&recs[i], BLOCK_TYPE_REF);
recs[i].u.ref.refname = strbuf_detach(&refname, NULL);
}
i = 1;
do {
struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF,
.u.ref = {
.refname = names[i],
} } };
struct pq_entry e = {
.rec = &recs[i],
};
merged_iter_pqueue_add(&pq, &e);
merged_iter_pqueue_check(pq);
i = (i * 7) % N;
} while (i != 1);
while (!merged_iter_pqueue_is_empty(pq)) {
struct pq_entry e = merged_iter_pqueue_remove(&pq);
struct reftable_record *rec = &e.rec;
merged_iter_pqueue_check(pq);
EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF);
if (last) {
EXPECT(strcmp(last, rec->u.ref.refname) < 0);
}
/* this is names[i], so don't dealloc. */
last = rec->u.ref.refname;
rec->u.ref.refname = NULL;
reftable_record_release(rec);
}
for (i = 0; i < N; i++) {
reftable_free(names[i]);
EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF);
if (last)
EXPECT(strcmp(last, e.rec->u.ref.refname) < 0);
last = e.rec->u.ref.refname;
}
for (i = 0; i < N; i++)
reftable_record_release(&recs[i]);
merged_iter_pqueue_release(&pq);
}

View File

@ -159,20 +159,19 @@ int reftable_encode_key(int *restart, struct string_view dest,
return start.len - dest.len;
}
int reftable_decode_key(struct strbuf *key, uint8_t *extra,
struct strbuf last_key, struct string_view in)
int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
struct string_view in)
{
int start_len = in.len;
uint64_t prefix_len = 0;
uint64_t suffix_len = 0;
int n = get_var_int(&prefix_len, &in);
int n;
n = get_var_int(&prefix_len, &in);
if (n < 0)
return -1;
string_view_consume(&in, n);
if (prefix_len > last_key.len)
return -1;
n = get_var_int(&suffix_len, &in);
if (n <= 0)
return -1;
@ -181,12 +180,12 @@ int reftable_decode_key(struct strbuf *key, uint8_t *extra,
*extra = (uint8_t)(suffix_len & 0x7);
suffix_len >>= 3;
if (in.len < suffix_len)
if (in.len < suffix_len ||
prefix_len > last_key->len)
return -1;
strbuf_reset(key);
strbuf_add(key, last_key.buf, prefix_len);
strbuf_add(key, in.buf, suffix_len);
strbuf_setlen(last_key, prefix_len);
strbuf_add(last_key, in.buf, suffix_len);
string_view_consume(&in, suffix_len);
return start_len - in.len;
@ -205,14 +204,26 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec,
{
struct reftable_ref_record *ref = rec;
const struct reftable_ref_record *src = src_rec;
char *refname = NULL;
size_t refname_cap = 0;
assert(hash_size > 0);
/* This is simple and correct, but we could probably reuse the hash
* fields. */
SWAP(refname, ref->refname);
SWAP(refname_cap, ref->refname_cap);
reftable_ref_record_release(ref);
SWAP(ref->refname, refname);
SWAP(ref->refname_cap, refname_cap);
if (src->refname) {
ref->refname = xstrdup(src->refname);
size_t refname_len = strlen(src->refname);
REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1,
ref->refname_cap);
memcpy(ref->refname, src->refname, refname_len);
ref->refname[refname_len] = 0;
}
ref->update_index = src->update_index;
ref->value_type = src->value_type;
switch (src->value_type) {
@ -368,16 +379,24 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
struct reftable_ref_record *r = rec;
struct string_view start = in;
uint64_t update_index = 0;
int n = get_var_int(&update_index, &in);
const char *refname = NULL;
size_t refname_cap = 0;
int n;
assert(hash_size > 0);
n = get_var_int(&update_index, &in);
if (n < 0)
return n;
string_view_consume(&in, n);
SWAP(refname, r->refname);
SWAP(refname_cap, r->refname_cap);
reftable_ref_record_release(r);
SWAP(r->refname, refname);
SWAP(r->refname_cap, refname_cap);
assert(hash_size > 0);
r->refname = reftable_malloc(key.len + 1);
REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap);
memcpy(r->refname, key.buf, key.len);
r->refname[key.len] = 0;
@ -1157,11 +1176,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest)
reftable_record_vtable(rec)->key(reftable_record_data(rec), dest);
}
uint8_t reftable_record_type(struct reftable_record *rec)
{
return rec->type;
}
int reftable_record_encode(struct reftable_record *rec, struct string_view dest,
int hash_size)
{
@ -1283,12 +1297,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log)
return (log->value_type == REFTABLE_LOG_DELETION);
}
void string_view_consume(struct string_view *s, int n)
{
s->buf += n;
s->len -= n;
}
static void *reftable_record_data(struct reftable_record *rec)
{
switch (rec->type) {

View File

@ -25,7 +25,11 @@ struct string_view {
};
/* Advance `s.buf` by `n`, and decrease length. */
void string_view_consume(struct string_view *s, int n);
static inline void string_view_consume(struct string_view *s, int n)
{
s->buf += n;
s->len -= n;
}
/* utilities for de/encoding varints */
@ -81,9 +85,12 @@ int reftable_encode_key(int *is_restart, struct string_view dest,
struct strbuf prev_key, struct strbuf key,
uint8_t extra);
/* Decode into `key` and `extra` from `in` */
int reftable_decode_key(struct strbuf *key, uint8_t *extra,
struct strbuf last_key, struct string_view in);
/*
* Decode into `last_key` and `extra` from `in`. `last_key` is expected to
* contain the decoded key of the preceding record, if any.
*/
int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
struct string_view in);
/* reftable_index_record are used internally to speed up lookups. */
struct reftable_index_record {
@ -124,7 +131,6 @@ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
void reftable_record_print(struct reftable_record *rec, int hash_size);
void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);
uint8_t reftable_record_type(struct reftable_record *rec);
void reftable_record_copy_from(struct reftable_record *rec,
struct reftable_record *src, int hash_size);
uint8_t reftable_record_val_type(struct reftable_record *rec);
@ -135,6 +141,11 @@ int reftable_record_decode(struct reftable_record *rec, struct strbuf key,
int hash_size);
int reftable_record_is_deletion(struct reftable_record *rec);
static inline uint8_t reftable_record_type(struct reftable_record *rec)
{
return rec->type;
}
/* frees and zeroes out the embedded record */
void reftable_record_release(struct reftable_record *rec);

View File

@ -295,7 +295,8 @@ static void test_key_roundtrip(void)
EXPECT(!restart);
EXPECT(n > 0);
m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest);
strbuf_addstr(&roundtrip, "refs/heads/master");
m = reftable_decode_key(&roundtrip, &rt_extra, dest);
EXPECT(n == m);
EXPECT(0 == strbuf_cmp(&key, &roundtrip));
EXPECT(rt_extra == extra);

View File

@ -22,6 +22,7 @@ license that can be found in the LICENSE file or at
/* reftable_ref_record holds a ref database entry target_value */
struct reftable_ref_record {
char *refname; /* Name of the ref, malloced. */
size_t refname_cap;
uint64_t update_index; /* Logical timestamp at which this value is
* written */