1
0
mirror of https://github.com/git/git.git synced 2024-10-20 07:38:51 +02:00
git/t/helper/test-hashmap.c

268 lines
6.2 KiB
C
Raw Normal View History

#include "git-compat-util.h"
#include "hashmap.h"
struct test_entry
{
struct hashmap_entry ent;
/* key and value as two \0-terminated strings */
char key[FLEX_ARRAY];
};
static const char *get_value(const struct test_entry *e)
{
return e->key + strlen(e->key) + 1;
}
static int test_entry_cmp(const void *cmp_data,
const void *entry,
const void *entry_or_key,
const void *keydata)
{
const int ignore_case = cmp_data ? *((int *)cmp_data) : 0;
const struct test_entry *e1 = entry;
const struct test_entry *e2 = entry_or_key;
const char *key = keydata;
if (ignore_case)
return strcasecmp(e1->key, key ? key : e2->key);
else
return strcmp(e1->key, key ? key : e2->key);
}
static struct test_entry *alloc_test_entry(int hash, char *key, int klen,
char *value, int vlen)
{
struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2));
hashmap_entry_init(entry, hash);
memcpy(entry->key, key, klen + 1);
memcpy(entry->key + klen + 1, value, vlen + 1);
return entry;
}
#define HASH_METHOD_FNV 0
#define HASH_METHOD_I 1
#define HASH_METHOD_IDIV10 2
#define HASH_METHOD_0 3
#define HASH_METHOD_X2 4
#define TEST_SPARSE 8
#define TEST_ADD 16
#define TEST_SIZE 100000
static unsigned int hash(unsigned int method, unsigned int i, const char *key)
{
unsigned int hash = 0;
switch (method & 3)
{
case HASH_METHOD_FNV:
hash = strhash(key);
break;
case HASH_METHOD_I:
hash = i;
break;
case HASH_METHOD_IDIV10:
hash = i / 10;
break;
case HASH_METHOD_0:
hash = 0;
break;
}
if (method & HASH_METHOD_X2)
hash = 2 * hash;
return hash;
}
/*
* Test performance of hashmap.[ch]
* Usage: time echo "perfhashmap method rounds" | test-hashmap
*/
static void perf_hashmap(unsigned int method, unsigned int rounds)
{
struct hashmap map;
char buf[16];
struct test_entry **entries;
unsigned int *hashes;
unsigned int i, j;
ALLOC_ARRAY(entries, TEST_SIZE);
ALLOC_ARRAY(hashes, TEST_SIZE);
for (i = 0; i < TEST_SIZE; i++) {
xsnprintf(buf, sizeof(buf), "%i", i);
entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0);
hashes[i] = hash(method, i, entries[i]->key);
}
if (method & TEST_ADD) {
/* test adding to the map */
for (j = 0; j < rounds; j++) {
hashmap_init(&map, test_entry_cmp, NULL, 0);
/* add entries */
for (i = 0; i < TEST_SIZE; i++) {
hashmap_entry_init(entries[i], hashes[i]);
hashmap_add(&map, entries[i]);
}
hashmap_free(&map, 0);
}
} else {
/* test map lookups */
hashmap_init(&map, test_entry_cmp, NULL, 0);
/* fill the map (sparsely if specified) */
j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE;
for (i = 0; i < j; i++) {
hashmap_entry_init(entries[i], hashes[i]);
hashmap_add(&map, entries[i]);
}
for (j = 0; j < rounds; j++) {
for (i = 0; i < TEST_SIZE; i++) {
hashmap_get_from_hash(&map, hashes[i],
entries[i]->key);
}
}
hashmap_free(&map, 0);
}
}
#define DELIM " \t\r\n"
/*
* Read stdin line by line and print result of commands to stdout:
*
* hash key -> strhash(key) memhash(key) strihash(key) memihash(key)
* put key value -> NULL / old value
* get key -> NULL / value
* remove key -> NULL / old value
* iterate -> key1 value1\nkey2 value2\n...
* size -> tablesize numentries
*
* perfhashmap method rounds -> test hashmap.[ch] performance
*/
add an extra level of indirection to main() There are certain startup tasks that we expect every git process to do. In some cases this is just to improve the quality of the program (e.g., setting up gettext()). In others it is a requirement for using certain functions in libgit.a (e.g., system_path() expects that you have called git_extract_argv0_path()). Most commands are builtins and are covered by the git.c version of main(). However, there are still a few external commands that use their own main(). Each of these has to remember to include the correct startup sequence, and we are not always consistent. Rather than just fix the inconsistencies, let's make this harder to get wrong by providing a common main() that can run this standard startup. We basically have two options to do this: - the compat/mingw.h file already does something like this by adding a #define that replaces the definition of main with a wrapper that calls mingw_startup(). The upside is that the code in each program doesn't need to be changed at all; it's rewritten on the fly by the preprocessor. The downside is that it may make debugging of the startup sequence a bit more confusing, as the preprocessor is quietly inserting new code. - the builtin functions are all of the form cmd_foo(), and git.c's main() calls them. This is much more explicit, which may make things more obvious to somebody reading the code. It's also more flexible (because of course we have to figure out _which_ cmd_foo() to call). The downside is that each of the builtins must define cmd_foo(), instead of just main(). This patch chooses the latter option, preferring the more explicit approach, even though it is more invasive. We introduce a new file common-main.c, with the "real" main. It expects to call cmd_main() from whatever other objects it is linked against. We link common-main.o against anything that links against libgit.a, since we know that such programs will need to do this setup. Note that common-main.o can't actually go inside libgit.a, as the linker would not pick up its main() function automatically (it has no callers). The rest of the patch is just adjusting all of the various external programs (mostly in t/helper) to use cmd_main(). I've provided a global declaration for cmd_main(), which means that all of the programs also need to match its signature. In particular, many functions need to switch to "const char **" instead of "char **" for argv. This effect ripples out to a few other variables and functions, as well. This makes the patch even more invasive, but the end result is much better. We should be treating argv strings as const anyway, and now all programs conform to the same signature (which also matches the way builtins are defined). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-07-01 07:58:58 +02:00
int cmd_main(int argc, const char **argv)
{
char line[1024];
struct hashmap map;
int icase;
/* init hash map */
icase = argc > 1 && !strcmp("ignorecase", argv[1]);
hashmap_init(&map, test_entry_cmp, &icase, 0);
/* process commands from stdin */
while (fgets(line, sizeof(line), stdin)) {
char *cmd, *p1 = NULL, *p2 = NULL;
int l1 = 0, l2 = 0, hash = 0;
struct test_entry *entry;
/* break line into command and up to two parameters */
cmd = strtok(line, DELIM);
/* ignore empty lines */
if (!cmd || *cmd == '#')
continue;
p1 = strtok(NULL, DELIM);
if (p1) {
l1 = strlen(p1);
hash = icase ? strihash(p1) : strhash(p1);
p2 = strtok(NULL, DELIM);
if (p2)
l2 = strlen(p2);
}
if (!strcmp("hash", cmd) && l1) {
/* print results of different hash functions */
printf("%u %u %u %u\n", strhash(p1), memhash(p1, l1),
strihash(p1), memihash(p1, l1));
} else if (!strcmp("add", cmd) && l1 && l2) {
/* create entry with key = p1, value = p2 */
entry = alloc_test_entry(hash, p1, l1, p2, l2);
/* add to hashmap */
hashmap_add(&map, entry);
} else if (!strcmp("put", cmd) && l1 && l2) {
/* create entry with key = p1, value = p2 */
entry = alloc_test_entry(hash, p1, l1, p2, l2);
/* add / replace entry */
entry = hashmap_put(&map, entry);
/* print and free replaced entry, if any */
puts(entry ? get_value(entry) : "NULL");
free(entry);
} else if (!strcmp("get", cmd) && l1) {
/* lookup entry in hashmap */
entry = hashmap_get_from_hash(&map, hash, p1);
/* print result */
if (!entry)
puts("NULL");
while (entry) {
puts(get_value(entry));
entry = hashmap_get_next(&map, entry);
}
} else if (!strcmp("remove", cmd) && l1) {
/* setup static key */
struct hashmap_entry key;
hashmap_entry_init(&key, hash);
/* remove entry from hashmap */
entry = hashmap_remove(&map, &key, p1);
/* print result and free entry*/
puts(entry ? get_value(entry) : "NULL");
free(entry);
} else if (!strcmp("iterate", cmd)) {
struct hashmap_iter iter;
hashmap_iter_init(&map, &iter);
while ((entry = hashmap_iter_next(&iter)))
printf("%s %s\n", entry->key, get_value(entry));
} else if (!strcmp("size", cmd)) {
/* print table sizes */
hashmap: add API to disable item counting when threaded This is to address concerns raised by ThreadSanitizer on the mailing list about threaded unprotected R/W access to map.size with my previous "disallow rehash" change (0607e10009ee4e37cb49b4cec8d28a9dda1656a4). See: https://public-inbox.org/git/adb37b70139fd1e2bac18bfd22c8b96683ae18eb.1502780344.git.martin.agren@gmail.com/ Add API to hashmap to disable item counting and thus automatic rehashing. Also include API to later re-enable them. When item counting is disabled, the map.size field is invalid. So to prevent accidents, the field has been renamed and an accessor function hashmap_get_size() has been added. All direct references to this field have been been updated. And the name of the field changed to map.private_size to communicate this. Here is the relevant output from ThreadSanitizer showing the problem: WARNING: ThreadSanitizer: data race (pid=10554) Read of size 4 at 0x00000082d488 by thread T2 (mutexes: write M16): #0 hashmap_add hashmap.c:209 #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302 #2 handle_range_dir name-hash.c:347 #3 handle_range_1 name-hash.c:415 #4 lazy_dir_thread_proc name-hash.c:471 #5 <null> <null> Previous write of size 4 at 0x00000082d488 by thread T1 (mutexes: write M31): #0 hashmap_add hashmap.c:209 #1 hash_dir_entry_with_parent_and_prefix name-hash.c:302 #2 handle_range_dir name-hash.c:347 #3 handle_range_1 name-hash.c:415 #4 handle_range_dir name-hash.c:380 #5 handle_range_1 name-hash.c:415 #6 lazy_dir_thread_proc name-hash.c:471 #7 <null> <null> Martin gives instructions for running TSan on test t3008 in this post: https://public-inbox.org/git/CAN0heSoJDL9pWELD6ciLTmWf-a=oyxe4EXXOmCKvsG5MSuzxsA@mail.gmail.com/ Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-09-06 17:43:48 +02:00
printf("%u %u\n", map.tablesize,
hashmap_get_size(&map));
} else if (!strcmp("intern", cmd) && l1) {
/* test that strintern works */
const char *i1 = strintern(p1);
const char *i2 = strintern(p1);
if (strcmp(i1, p1))
printf("strintern(%s) returns %s\n", p1, i1);
else if (i1 == p1)
printf("strintern(%s) returns input pointer\n", p1);
else if (i1 != i2)
printf("strintern(%s) != strintern(%s)", i1, i2);
else
printf("%s\n", i1);
} else if (!strcmp("perfhashmap", cmd) && l1 && l2) {
perf_hashmap(atoi(p1), atoi(p2));
} else {
printf("Unknown command %s\n", cmd);
}
}
hashmap_free(&map, 1);
return 0;
}