1
0
mirror of https://github.com/git/git.git synced 2024-09-28 16:13:01 +02:00
git/oidset.h
René Scharfe 8b2f8cbcb1 oidset: use khash
Reimplement oidset using khash.h in order to reduce its memory footprint
and make it faster.

Performance of a command that mainly checks for duplicate objects using
an oidset, with master and Clang 6.0.1:

  $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'"

  $ /usr/bin/time $cmd >/dev/null
  0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k
  0inputs+0outputs (0major+11204minor)pagefaults 0swaps

  $ hyperfine "$cmd"
  Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'

    Time (mean ± σ):     250.0 ms ±   6.0 ms    [User: 225.9 ms, System: 23.6 ms]

    Range (min … max):   242.0 ms … 261.1 ms

And with this patch:

  $ /usr/bin/time $cmd >/dev/null
  0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k
  0inputs+0outputs (0major+8318minor)pagefaults 0swaps

  $ hyperfine "$cmd"
  Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'

    Time (mean ± σ):     151.9 ms ±   4.9 ms    [User: 130.5 ms, System: 21.2 ms]

    Range (min … max):   148.2 ms … 170.4 ms

Initial-patch-by: Jeff King <peff@peff.net>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-04 11:12:13 -07:00

104 lines
2.4 KiB
C

#ifndef OIDSET_H
#define OIDSET_H
#include "hashmap.h"
#include "khash.h"
/**
* This API is similar to sha1-array, in that it maintains a set of object ids
* in a memory-efficient way. The major differences are:
*
* 1. It uses a hash, so we can do online duplicate removal, rather than
* sort-and-uniq at the end. This can reduce memory footprint if you have
* a large list of oids with many duplicates.
*
* 2. The per-unique-oid memory footprint is slightly higher due to hash
* table overhead.
*/
static inline unsigned int oid_hash(struct object_id oid)
{
return sha1hash(oid.hash);
}
static inline int oid_equal(struct object_id a, struct object_id b)
{
return oideq(&a, &b);
}
KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal)
/**
* A single oidset; should be zero-initialized (or use OIDSET_INIT).
*/
struct oidset {
kh_oid_t set;
};
#define OIDSET_INIT { { 0 } }
static inline void oidset_init(struct oidset *set, size_t initial_size)
{
memset(&set->set, 0, sizeof(set->set));
if (initial_size)
kh_resize_oid(&set->set, initial_size);
}
/**
* Returns true iff `set` contains `oid`.
*/
int oidset_contains(const struct oidset *set, const struct object_id *oid);
/**
* Insert the oid into the set; a copy is made, so "oid" does not need
* to persist after this function is called.
*
* Returns 1 if the oid was already in the set, 0 otherwise. This can be used
* to perform an efficient check-and-add.
*/
int oidset_insert(struct oidset *set, const struct object_id *oid);
/**
* Remove the oid from the set.
*
* Returns 1 if the oid was present in the set, 0 otherwise.
*/
int oidset_remove(struct oidset *set, const struct object_id *oid);
/**
* Remove all entries from the oidset, freeing any resources associated with
* it.
*/
void oidset_clear(struct oidset *set);
struct oidset_iter {
kh_oid_t *set;
khiter_t iter;
};
static inline void oidset_iter_init(struct oidset *set,
struct oidset_iter *iter)
{
iter->set = &set->set;
iter->iter = kh_begin(iter->set);
}
static inline struct object_id *oidset_iter_next(struct oidset_iter *iter)
{
for (; iter->iter != kh_end(iter->set); iter->iter++) {
if (kh_exist(iter->set, iter->iter))
return &kh_key(iter->set, iter->iter++);
}
return NULL;
}
static inline struct object_id *oidset_iter_first(struct oidset *set,
struct oidset_iter *iter)
{
oidset_iter_init(set, iter);
return oidset_iter_next(iter);
}
#endif /* OIDSET_H */