diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 2c88bbd448..74dab18eda 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -346,6 +346,7 @@ struct reftable_ref_iterator { struct object_id oid; const char *prefix; + size_t prefix_len; unsigned int flags; int err; }; @@ -374,8 +375,8 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) continue; } - if (iter->prefix && - strncmp(iter->prefix, iter->ref.refname, strlen(iter->prefix))) { + if (iter->prefix_len && + strncmp(iter->prefix, iter->ref.refname, iter->prefix_len)) { iter->err = 1; break; } @@ -484,6 +485,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_ iter = xcalloc(1, sizeof(*iter)); base_ref_iterator_init(&iter->base, &reftable_ref_iterator_vtable); iter->prefix = prefix; + iter->prefix_len = prefix ? strlen(prefix) : 0; iter->base.oid = &iter->oid; iter->flags = flags; iter->refs = refs; diff --git a/reftable/block.c b/reftable/block.c index 72eb73b380..ad9074dba6 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -291,9 +291,8 @@ static int restart_key_less(size_t idx, void *args) /* the restart key is verbatim in the block, so this could avoid the alloc for decoding the key */ struct strbuf rkey = STRBUF_INIT; - struct strbuf last_key = STRBUF_INIT; uint8_t unused_extra; - int n = reftable_decode_key(&rkey, &unused_extra, last_key, in); + int n = reftable_decode_key(&rkey, &unused_extra, in); int result; if (n < 0) { a->error = 1; @@ -326,35 +325,34 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) if (it->next_off >= it->br->block_len) return 1; - n = reftable_decode_key(&it->key, &extra, it->last_key, in); + n = reftable_decode_key(&it->last_key, &extra, in); if (n < 0) return -1; - - if (!it->key.len) + if (!it->last_key.len) return REFTABLE_FORMAT_ERROR; string_view_consume(&in, n); - n = reftable_record_decode(rec, it->key, extra, in, it->br->hash_size); + n = reftable_record_decode(rec, it->last_key, extra, in, it->br->hash_size); if (n < 0) return -1; string_view_consume(&in, n); - strbuf_swap(&it->last_key, &it->key); it->next_off += start.len - in.len; return 0; } int block_reader_first_key(struct block_reader *br, struct strbuf *key) { - struct strbuf empty = STRBUF_INIT; - int off = br->header_off + 4; + int off = br->header_off + 4, n; struct string_view in = { .buf = br->block.data + off, .len = br->block_len - off, }; - uint8_t extra = 0; - int n = reftable_decode_key(key, &extra, empty, in); + + strbuf_reset(key); + + n = reftable_decode_key(key, &extra, in); if (n < 0) return n; if (!key->len) @@ -371,7 +369,6 @@ int block_iter_seek(struct block_iter *it, struct strbuf *want) void block_iter_close(struct block_iter *it) { strbuf_release(&it->last_key); - strbuf_release(&it->key); } int block_reader_seek(struct block_reader *br, struct block_iter *it, @@ -408,8 +405,8 @@ int block_reader_seek(struct block_reader *br, struct block_iter *it, if (err < 0) goto done; - reftable_record_key(&rec, &it->key); - if (err > 0 || strbuf_cmp(&it->key, want) >= 0) { + reftable_record_key(&rec, &it->last_key); + if (err > 0 || strbuf_cmp(&it->last_key, want) >= 0) { err = 0; goto done; } diff --git a/reftable/block.h b/reftable/block.h index 17481e6331..51699af233 100644 --- a/reftable/block.h +++ b/reftable/block.h @@ -84,12 +84,10 @@ struct block_iter { /* key for last entry we read. */ struct strbuf last_key; - struct strbuf key; }; #define BLOCK_ITER_INIT { \ .last_key = STRBUF_INIT, \ - .key = STRBUF_INIT, \ } /* initializes a block reader. */ diff --git a/reftable/iter.c b/reftable/iter.c index 8b5ebf6183..7aa30c4a51 100644 --- a/reftable/iter.c +++ b/reftable/iter.c @@ -16,11 +16,6 @@ license that can be found in the LICENSE file or at #include "reader.h" #include "reftable-error.h" -int iterator_is_null(struct reftable_iterator *it) -{ - return !it->ops; -} - static void filtering_ref_iterator_close(void *iter_arg) { struct filtering_ref_iterator *fri = iter_arg; diff --git a/reftable/iter.h b/reftable/iter.h index 47d67d84df..537431baba 100644 --- a/reftable/iter.h +++ b/reftable/iter.h @@ -16,10 +16,6 @@ license that can be found in the LICENSE file or at #include "reftable-iterator.h" #include "reftable-generic.h" -/* Returns true for a zeroed out iterator, such as the one returned from - * iterator_destroy. */ -int iterator_is_null(struct reftable_iterator *it); - /* iterator that produces only ref records that point to `oid` */ struct filtering_ref_iterator { int double_check; diff --git a/reftable/merged.c b/reftable/merged.c index 1aa6cd31b7..f85a24c678 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -17,23 +17,37 @@ license that can be found in the LICENSE file or at #include "reftable-error.h" #include "system.h" +struct merged_subiter { + struct reftable_iterator iter; + struct reftable_record rec; +}; + +struct merged_iter { + struct merged_subiter *subiters; + struct merged_iter_pqueue pq; + uint32_t hash_id; + size_t stack_len; + uint8_t typ; + int suppress_deletions; + ssize_t advance_index; +}; + static int merged_iter_init(struct merged_iter *mi) { for (size_t i = 0; i < mi->stack_len; i++) { struct pq_entry e = { .index = i, + .rec = &mi->subiters[i].rec, }; int err; - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[i], &e.rec); + reftable_record_init(&mi->subiters[i].rec, mi->typ); + err = iterator_next(&mi->subiters[i].iter, + &mi->subiters[i].rec); if (err < 0) return err; - if (err > 0) { - reftable_iterator_destroy(&mi->stack[i]); - reftable_record_release(&e.rec); + if (err > 0) continue; - } merged_iter_pqueue_add(&mi->pq, &e); } @@ -46,54 +60,66 @@ static void merged_iter_close(void *p) struct merged_iter *mi = p; merged_iter_pqueue_release(&mi->pq); - for (size_t i = 0; i < mi->stack_len; i++) - reftable_iterator_destroy(&mi->stack[i]); - reftable_free(mi->stack); -} - -static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, - size_t idx) -{ - struct pq_entry e = { - .index = idx, - }; - int err; - - reftable_record_init(&e.rec, mi->typ); - err = iterator_next(&mi->stack[idx], &e.rec); - if (err < 0) - return err; - - if (err > 0) { - reftable_iterator_destroy(&mi->stack[idx]); - reftable_record_release(&e.rec); - return 0; + for (size_t i = 0; i < mi->stack_len; i++) { + reftable_iterator_destroy(&mi->subiters[i].iter); + reftable_record_release(&mi->subiters[i].rec); } - - merged_iter_pqueue_add(&mi->pq, &e); - return 0; + reftable_free(mi->subiters); } static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx) { - if (iterator_is_null(&mi->stack[idx])) - return 0; - return merged_iter_advance_nonnull_subiter(mi, idx); + struct pq_entry e = { + .index = idx, + .rec = &mi->subiters[idx].rec, + }; + int err; + + err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec); + if (err) + return err; + + merged_iter_pqueue_add(&mi->pq, &e); + return 0; } static int merged_iter_next_entry(struct merged_iter *mi, struct reftable_record *rec) { struct pq_entry entry = { 0 }; - int err = 0; + int err = 0, empty; - if (merged_iter_pqueue_is_empty(mi->pq)) + empty = merged_iter_pqueue_is_empty(mi->pq); + + if (mi->advance_index >= 0) { + /* + * When there are no pqueue entries then we only have a single + * subiter left. There is no need to use the pqueue in that + * case anymore as we know that the subiter will return entries + * in the correct order already. + * + * While this may sound like a very specific edge case, it may + * happen more frequently than you think. Most repositories + * will end up having a single large base table that contains + * most of the refs. It's thus likely that we exhaust all + * subiters but the one from that base ref. + */ + if (empty) + return iterator_next(&mi->subiters[mi->advance_index].iter, + rec); + + err = merged_iter_advance_subiter(mi, mi->advance_index); + if (err < 0) + return err; + if (!err) + empty = 0; + mi->advance_index = -1; + } + + if (empty) return 1; entry = merged_iter_pqueue_remove(&mi->pq); - err = merged_iter_advance_subiter(mi, entry.index); - if (err < 0) - return err; /* One can also use reftable as datacenter-local storage, where the ref @@ -107,54 +133,32 @@ static int merged_iter_next_entry(struct merged_iter *mi, struct pq_entry top = merged_iter_pqueue_top(mi->pq); int cmp; - /* - * When the next entry comes from the same queue as the current - * entry then it must by definition be larger. This avoids a - * comparison in the most common case. - */ - if (top.index == entry.index) - break; - - cmp = reftable_record_cmp(&top.rec, &entry.rec); + cmp = reftable_record_cmp(top.rec, entry.rec); if (cmp > 0) break; merged_iter_pqueue_remove(&mi->pq); err = merged_iter_advance_subiter(mi, top.index); if (err < 0) - goto done; - reftable_record_release(&top.rec); + return err; } - reftable_record_release(rec); - *rec = entry.rec; - -done: - if (err) - reftable_record_release(&entry.rec); - return err; -} - -static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec) -{ - while (1) { - int err = merged_iter_next_entry(mi, rec); - if (err == 0 && mi->suppress_deletions && - reftable_record_is_deletion(rec)) { - continue; - } - - return err; - } + mi->advance_index = entry.index; + SWAP(*rec, *entry.rec); + return 0; } static int merged_iter_next_void(void *p, struct reftable_record *rec) { struct merged_iter *mi = p; - if (merged_iter_pqueue_is_empty(mi->pq)) - return 1; - - return merged_iter_next(mi, rec); + while (1) { + int err = merged_iter_next_entry(mi, rec); + if (err) + return err; + if (mi->suppress_deletions && reftable_record_is_deletion(rec)) + continue; + return 0; + } } static struct reftable_iterator_vtable merged_iter_vtable = { @@ -246,14 +250,15 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, + .advance_index = -1, }; struct merged_iter *p; int err; - REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len); + REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len); for (size_t i = 0; i < mt->stack_len; i++) { err = reftable_table_seek_record(&mt->stack[i], - &merged.stack[merged.stack_len], rec); + &merged.subiters[merged.stack_len].iter, rec); if (err < 0) goto out; if (!err) diff --git a/reftable/merged.h b/reftable/merged.h index 7d9f95d27e..a2571dbc99 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -9,7 +9,7 @@ license that can be found in the LICENSE file or at #ifndef MERGED_H #define MERGED_H -#include "pq.h" +#include "system.h" struct reftable_merged_table { struct reftable_table *stack; @@ -24,15 +24,6 @@ struct reftable_merged_table { uint64_t max; }; -struct merged_iter { - struct reftable_iterator *stack; - uint32_t hash_id; - size_t stack_len; - uint8_t typ; - int suppress_deletions; - struct merged_iter_pqueue pq; -}; - void merged_table_release(struct reftable_merged_table *mt); #endif diff --git a/reftable/pq.c b/reftable/pq.c index e0ccce2b97..7fb45d8c60 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,22 +14,12 @@ license that can be found in the LICENSE file or at int pq_less(struct pq_entry *a, struct pq_entry *b) { - int cmp = reftable_record_cmp(&a->rec, &b->rec); + int cmp = reftable_record_cmp(a->rec, b->rec); if (cmp == 0) return a->index > b->index; return cmp < 0; } -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) -{ - return pq.heap[0]; -} - -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) -{ - return pq.len == 0; -} - struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq) { int i = 0; @@ -82,10 +72,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry void merged_iter_pqueue_release(struct merged_iter_pqueue *pq) { - int i = 0; - for (i = 0; i < pq->len; i++) { - reftable_record_release(&pq->heap[i].rec); - } FREE_AND_NULL(pq->heap); - pq->len = pq->cap = 0; + memset(pq, 0, sizeof(*pq)); } diff --git a/reftable/pq.h b/reftable/pq.h index e85bac9b52..f796c23179 100644 --- a/reftable/pq.h +++ b/reftable/pq.h @@ -12,8 +12,8 @@ license that can be found in the LICENSE file or at #include "record.h" struct pq_entry { - int index; - struct reftable_record rec; + size_t index; + struct reftable_record *rec; }; struct merged_iter_pqueue { @@ -22,12 +22,20 @@ struct merged_iter_pqueue { size_t cap; }; -struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq); -int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq); void merged_iter_pqueue_check(struct merged_iter_pqueue pq); struct pq_entry merged_iter_pqueue_remove(struct merged_iter_pqueue *pq); void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry *e); void merged_iter_pqueue_release(struct merged_iter_pqueue *pq); int pq_less(struct pq_entry *a, struct pq_entry *b); +static inline struct pq_entry merged_iter_pqueue_top(struct merged_iter_pqueue pq) +{ + return pq.heap[0]; +} + +static inline int merged_iter_pqueue_is_empty(struct merged_iter_pqueue pq) +{ + return pq.len == 0; +} + #endif diff --git a/reftable/pq_test.c b/reftable/pq_test.c index c202eff848..b7d3c80cc7 100644 --- a/reftable/pq_test.c +++ b/reftable/pq_test.c @@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq) static void test_pq(void) { - char *names[54] = { NULL }; - int N = ARRAY_SIZE(names) - 1; - struct merged_iter_pqueue pq = { NULL }; + struct reftable_record recs[54]; + int N = ARRAY_SIZE(recs) - 1, i; char *last = NULL; - int i = 0; for (i = 0; i < N; i++) { - char name[100]; - snprintf(name, sizeof(name), "%02d", i); - names[i] = xstrdup(name); + struct strbuf refname = STRBUF_INIT; + strbuf_addf(&refname, "%02d", i); + + reftable_record_init(&recs[i], BLOCK_TYPE_REF); + recs[i].u.ref.refname = strbuf_detach(&refname, NULL); } i = 1; do { - struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF, - .u.ref = { - .refname = names[i], - } } }; + struct pq_entry e = { + .rec = &recs[i], + }; + merged_iter_pqueue_add(&pq, &e); merged_iter_pqueue_check(pq); + i = (i * 7) % N; } while (i != 1); while (!merged_iter_pqueue_is_empty(pq)) { struct pq_entry e = merged_iter_pqueue_remove(&pq); - struct reftable_record *rec = &e.rec; merged_iter_pqueue_check(pq); - EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF); - if (last) { - EXPECT(strcmp(last, rec->u.ref.refname) < 0); - } - /* this is names[i], so don't dealloc. */ - last = rec->u.ref.refname; - rec->u.ref.refname = NULL; - reftable_record_release(rec); - } - for (i = 0; i < N; i++) { - reftable_free(names[i]); + EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF); + if (last) + EXPECT(strcmp(last, e.rec->u.ref.refname) < 0); + last = e.rec->u.ref.refname; } + for (i = 0; i < N; i++) + reftable_record_release(&recs[i]); merged_iter_pqueue_release(&pq); } diff --git a/reftable/record.c b/reftable/record.c index d6bb42e887..367de04600 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -159,20 +159,19 @@ int reftable_encode_key(int *restart, struct string_view dest, return start.len - dest.len; } -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in) +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in) { int start_len = in.len; uint64_t prefix_len = 0; uint64_t suffix_len = 0; - int n = get_var_int(&prefix_len, &in); + int n; + + n = get_var_int(&prefix_len, &in); if (n < 0) return -1; string_view_consume(&in, n); - if (prefix_len > last_key.len) - return -1; - n = get_var_int(&suffix_len, &in); if (n <= 0) return -1; @@ -181,12 +180,12 @@ int reftable_decode_key(struct strbuf *key, uint8_t *extra, *extra = (uint8_t)(suffix_len & 0x7); suffix_len >>= 3; - if (in.len < suffix_len) + if (in.len < suffix_len || + prefix_len > last_key->len) return -1; - strbuf_reset(key); - strbuf_add(key, last_key.buf, prefix_len); - strbuf_add(key, in.buf, suffix_len); + strbuf_setlen(last_key, prefix_len); + strbuf_add(last_key, in.buf, suffix_len); string_view_consume(&in, suffix_len); return start_len - in.len; @@ -205,14 +204,26 @@ static void reftable_ref_record_copy_from(void *rec, const void *src_rec, { struct reftable_ref_record *ref = rec; const struct reftable_ref_record *src = src_rec; + char *refname = NULL; + size_t refname_cap = 0; + assert(hash_size > 0); - /* This is simple and correct, but we could probably reuse the hash - * fields. */ + SWAP(refname, ref->refname); + SWAP(refname_cap, ref->refname_cap); reftable_ref_record_release(ref); + SWAP(ref->refname, refname); + SWAP(ref->refname_cap, refname_cap); + if (src->refname) { - ref->refname = xstrdup(src->refname); + size_t refname_len = strlen(src->refname); + + REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1, + ref->refname_cap); + memcpy(ref->refname, src->refname, refname_len); + ref->refname[refname_len] = 0; } + ref->update_index = src->update_index; ref->value_type = src->value_type; switch (src->value_type) { @@ -368,16 +379,24 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, struct reftable_ref_record *r = rec; struct string_view start = in; uint64_t update_index = 0; - int n = get_var_int(&update_index, &in); + const char *refname = NULL; + size_t refname_cap = 0; + int n; + + assert(hash_size > 0); + + n = get_var_int(&update_index, &in); if (n < 0) return n; string_view_consume(&in, n); + SWAP(refname, r->refname); + SWAP(refname_cap, r->refname_cap); reftable_ref_record_release(r); + SWAP(r->refname, refname); + SWAP(r->refname_cap, refname_cap); - assert(hash_size > 0); - - r->refname = reftable_malloc(key.len + 1); + REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap); memcpy(r->refname, key.buf, key.len); r->refname[key.len] = 0; @@ -1157,11 +1176,6 @@ void reftable_record_key(struct reftable_record *rec, struct strbuf *dest) reftable_record_vtable(rec)->key(reftable_record_data(rec), dest); } -uint8_t reftable_record_type(struct reftable_record *rec) -{ - return rec->type; -} - int reftable_record_encode(struct reftable_record *rec, struct string_view dest, int hash_size) { @@ -1283,12 +1297,6 @@ int reftable_log_record_is_deletion(const struct reftable_log_record *log) return (log->value_type == REFTABLE_LOG_DELETION); } -void string_view_consume(struct string_view *s, int n) -{ - s->buf += n; - s->len -= n; -} - static void *reftable_record_data(struct reftable_record *rec) { switch (rec->type) { diff --git a/reftable/record.h b/reftable/record.h index a05e2be179..5e8304e052 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -25,7 +25,11 @@ struct string_view { }; /* Advance `s.buf` by `n`, and decrease length. */ -void string_view_consume(struct string_view *s, int n); +static inline void string_view_consume(struct string_view *s, int n) +{ + s->buf += n; + s->len -= n; +} /* utilities for de/encoding varints */ @@ -81,9 +85,12 @@ int reftable_encode_key(int *is_restart, struct string_view dest, struct strbuf prev_key, struct strbuf key, uint8_t extra); -/* Decode into `key` and `extra` from `in` */ -int reftable_decode_key(struct strbuf *key, uint8_t *extra, - struct strbuf last_key, struct string_view in); +/* + * Decode into `last_key` and `extra` from `in`. `last_key` is expected to + * contain the decoded key of the preceding record, if any. + */ +int reftable_decode_key(struct strbuf *last_key, uint8_t *extra, + struct string_view in); /* reftable_index_record are used internally to speed up lookups. */ struct reftable_index_record { @@ -124,7 +131,6 @@ int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest); -uint8_t reftable_record_type(struct reftable_record *rec); void reftable_record_copy_from(struct reftable_record *rec, struct reftable_record *src, int hash_size); uint8_t reftable_record_val_type(struct reftable_record *rec); @@ -135,6 +141,11 @@ int reftable_record_decode(struct reftable_record *rec, struct strbuf key, int hash_size); int reftable_record_is_deletion(struct reftable_record *rec); +static inline uint8_t reftable_record_type(struct reftable_record *rec) +{ + return rec->type; +} + /* frees and zeroes out the embedded record */ void reftable_record_release(struct reftable_record *rec); diff --git a/reftable/record_test.c b/reftable/record_test.c index a86cff5526..89209894d8 100644 --- a/reftable/record_test.c +++ b/reftable/record_test.c @@ -295,7 +295,8 @@ static void test_key_roundtrip(void) EXPECT(!restart); EXPECT(n > 0); - m = reftable_decode_key(&roundtrip, &rt_extra, last_key, dest); + strbuf_addstr(&roundtrip, "refs/heads/master"); + m = reftable_decode_key(&roundtrip, &rt_extra, dest); EXPECT(n == m); EXPECT(0 == strbuf_cmp(&key, &roundtrip)); EXPECT(rt_extra == extra); diff --git a/reftable/reftable-record.h b/reftable/reftable-record.h index bb6e99acd3..e657001d42 100644 --- a/reftable/reftable-record.h +++ b/reftable/reftable-record.h @@ -22,6 +22,7 @@ license that can be found in the LICENSE file or at /* reftable_ref_record holds a ref database entry target_value */ struct reftable_ref_record { char *refname; /* Name of the ref, malloced. */ + size_t refname_cap; uint64_t update_index; /* Logical timestamp at which this value is * written */