From 99bf115c879af7e38ef0ca9596fc9db1d6598d5f Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:24 -0700 Subject: [PATCH 01/13] repository: introduce parsed objects field Convert the existing global cache for parsed objects (obj_hash) into repository-specific parsed object caches. Existing code that uses obj_hash are modified to use the parsed object cache of the_repository; future patches will use the parsed object caches of other repositories. Another future use case for a pool of objects is ease of memory management in revision walking: If we can free the rev-list related memory early in pack-objects (e.g. part of repack operation) then it could lower memory pressure significantly when running on large repos. While this has been discussed on the mailing list lately, this series doesn't implement this. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 63 +++++++++++++++++++++++++++++++++------------------- object.h | 8 +++++++ repository.c | 7 ++++++ repository.h | 9 ++++++++ 4 files changed, 64 insertions(+), 23 deletions(-) diff --git a/object.c b/object.c index 5044d08e96..f7c624a7ba 100644 --- a/object.c +++ b/object.c @@ -8,17 +8,14 @@ #include "object-store.h" #include "packfile.h" -static struct object **obj_hash; -static int nr_objs, obj_hash_size; - unsigned int get_max_object_index(void) { - return obj_hash_size; + return the_repository->parsed_objects->obj_hash_size; } struct object *get_indexed_object(unsigned int idx) { - return obj_hash[idx]; + return the_repository->parsed_objects->obj_hash[idx]; } static const char *object_type_strings[] = { @@ -90,15 +87,16 @@ struct object *lookup_object(const unsigned char *sha1) unsigned int i, first; struct object *obj; - if (!obj_hash) + if (!the_repository->parsed_objects->obj_hash) return NULL; - first = i = hash_obj(sha1, obj_hash_size); - while ((obj = obj_hash[i]) != NULL) { + first = i = hash_obj(sha1, + the_repository->parsed_objects->obj_hash_size); + while ((obj = the_repository->parsed_objects->obj_hash[i]) != NULL) { if (!hashcmp(sha1, obj->oid.hash)) break; i++; - if (i == obj_hash_size) + if (i == the_repository->parsed_objects->obj_hash_size) i = 0; } if (obj && i != first) { @@ -107,7 +105,8 @@ struct object *lookup_object(const unsigned char *sha1) * that we do not need to walk the hash table the next * time we look for it. */ - SWAP(obj_hash[i], obj_hash[first]); + SWAP(the_repository->parsed_objects->obj_hash[i], + the_repository->parsed_objects->obj_hash[first]); } return obj; } @@ -124,19 +123,19 @@ static void grow_object_hash(void) * Note that this size must always be power-of-2 to match hash_obj * above. */ - int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size; + int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size; struct object **new_hash; new_hash = xcalloc(new_hash_size, sizeof(struct object *)); - for (i = 0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (!obj) continue; insert_obj_hash(obj, new_hash, new_hash_size); } - free(obj_hash); - obj_hash = new_hash; - obj_hash_size = new_hash_size; + free(the_repository->parsed_objects->obj_hash); + the_repository->parsed_objects->obj_hash = new_hash; + the_repository->parsed_objects->obj_hash_size = new_hash_size; } void *create_object(const unsigned char *sha1, void *o) @@ -147,11 +146,12 @@ void *create_object(const unsigned char *sha1, void *o) obj->flags = 0; hashcpy(obj->oid.hash, sha1); - if (obj_hash_size - 1 <= nr_objs * 2) + if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) grow_object_hash(); - insert_obj_hash(obj, obj_hash, obj_hash_size); - nr_objs++; + insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, + the_repository->parsed_objects->obj_hash_size); + the_repository->parsed_objects->nr_objs++; return obj; } @@ -431,8 +431,8 @@ void clear_object_flags(unsigned flags) { int i; - for (i=0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i=0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (obj) obj->flags &= ~flags; } @@ -442,13 +442,20 @@ void clear_commit_marks_all(unsigned int flags) { int i; - for (i = 0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (obj && obj->type == OBJ_COMMIT) obj->flags &= ~flags; } } +struct parsed_object_pool *parsed_object_pool_new(void) +{ + struct parsed_object_pool *o = xmalloc(sizeof(*o)); + memset(o, 0, sizeof(*o)); + return o; +} + struct raw_object_store *raw_object_store_new(void) { struct raw_object_store *o = xmalloc(sizeof(*o)); @@ -488,3 +495,13 @@ void raw_object_store_clear(struct raw_object_store *o) close_all_packs(o); o->packed_git = NULL; } + +void parsed_object_pool_clear(struct parsed_object_pool *o) +{ + /* + * TOOD free objects in o->obj_hash. + * + * As objects are allocated in slabs (see alloc.c), we do + * not need to free each object, but each slab instead. + */ +} diff --git a/object.h b/object.h index f13f85b2a9..cecda7da37 100644 --- a/object.h +++ b/object.h @@ -1,6 +1,14 @@ #ifndef OBJECT_H #define OBJECT_H +struct parsed_object_pool { + struct object **obj_hash; + int nr_objs, obj_hash_size; +}; + +struct parsed_object_pool *parsed_object_pool_new(void); +void parsed_object_pool_clear(struct parsed_object_pool *o); + struct object_list { struct object *item; struct object_list *next; diff --git a/repository.c b/repository.c index a4848c1bd0..c23404677e 100644 --- a/repository.c +++ b/repository.c @@ -2,6 +2,7 @@ #include "repository.h" #include "object-store.h" #include "config.h" +#include "object.h" #include "submodule-config.h" /* The main repository */ @@ -14,6 +15,8 @@ void initialize_the_repository(void) the_repo.index = &the_index; the_repo.objects = raw_object_store_new(); + the_repo.parsed_objects = parsed_object_pool_new(); + repo_set_hash_algo(&the_repo, GIT_HASH_SHA1); } @@ -143,6 +146,7 @@ static int repo_init(struct repository *repo, memset(repo, 0, sizeof(*repo)); repo->objects = raw_object_store_new(); + repo->parsed_objects = parsed_object_pool_new(); if (repo_init_gitdir(repo, gitdir)) goto error; @@ -226,6 +230,9 @@ void repo_clear(struct repository *repo) raw_object_store_clear(repo->objects); FREE_AND_NULL(repo->objects); + parsed_object_pool_clear(repo->parsed_objects); + FREE_AND_NULL(repo->parsed_objects); + if (repo->config) { git_configset_clear(repo->config); FREE_AND_NULL(repo->config); diff --git a/repository.h b/repository.h index e6e00f541b..6d19981990 100644 --- a/repository.h +++ b/repository.h @@ -26,6 +26,15 @@ struct repository { */ struct raw_object_store *objects; + /* + * All objects in this repository that have been parsed. This structure + * owns all objects it references, so users of "struct object *" + * generally do not need to free them; instead, when a repository is no + * longer used, call parsed_object_pool_clear() on this structure, which + * is called by the repositories repo_clear on its desconstruction. + */ + struct parsed_object_pool *parsed_objects; + /* The store in which the refs are held. */ struct ref_store *refs; From 68f95d382b51b134b138c91f94adb8d9ef2f557a Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:25 -0700 Subject: [PATCH 02/13] object: add repository argument to create_object Add a repository argument to allow the callers of create_object to be more specific about which repository to act on. This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- blob.c | 4 +++- commit.c | 3 ++- object.c | 5 +++-- object.h | 3 ++- tag.c | 3 ++- tree.c | 3 ++- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/blob.c b/blob.c index fa2ab4f7a7..85c2143f29 100644 --- a/blob.c +++ b/blob.c @@ -1,5 +1,6 @@ #include "cache.h" #include "blob.h" +#include "repository.h" const char *blob_type = "blob"; @@ -7,7 +8,8 @@ struct blob *lookup_blob(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_blob_node()); + return create_object(the_repository, oid->hash, + alloc_blob_node()); return object_as_type(obj, OBJ_BLOB, 0); } diff --git a/commit.c b/commit.c index ca474a7c11..9106acf0aa 100644 --- a/commit.c +++ b/commit.c @@ -50,7 +50,8 @@ struct commit *lookup_commit(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_commit_node()); + return create_object(the_repository, oid->hash, + alloc_commit_node()); return object_as_type(obj, OBJ_COMMIT, 0); } diff --git a/object.c b/object.c index f7c624a7ba..2de029275b 100644 --- a/object.c +++ b/object.c @@ -138,7 +138,7 @@ static void grow_object_hash(void) the_repository->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object(const unsigned char *sha1, void *o) +void *create_object_the_repository(const unsigned char *sha1, void *o) { struct object *obj = o; @@ -178,7 +178,8 @@ struct object *lookup_unknown_object(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); if (!obj) - obj = create_object(sha1, alloc_object_node()); + obj = create_object(the_repository, sha1, + alloc_object_node()); return obj; } diff --git a/object.h b/object.h index cecda7da37..2cb0b24108 100644 --- a/object.h +++ b/object.h @@ -93,7 +93,8 @@ extern struct object *get_indexed_object(unsigned int); */ struct object *lookup_object(const unsigned char *sha1); -extern void *create_object(const unsigned char *sha1, void *obj); +#define create_object(r, s, o) create_object_##r(s, o) +extern void *create_object_the_repository(const unsigned char *sha1, void *obj); void *object_as_type(struct object *obj, enum object_type type, int quiet); diff --git a/tag.c b/tag.c index 3d37c1bd25..7150b759d6 100644 --- a/tag.c +++ b/tag.c @@ -93,7 +93,8 @@ struct tag *lookup_tag(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_tag_node()); + return create_object(the_repository, oid->hash, + alloc_tag_node()); return object_as_type(obj, OBJ_TAG, 0); } diff --git a/tree.c b/tree.c index 1c68ea586b..63730e3fb4 100644 --- a/tree.c +++ b/tree.c @@ -196,7 +196,8 @@ struct tree *lookup_tree(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_tree_node()); + return create_object(the_repository, oid->hash, + alloc_tree_node()); return object_as_type(obj, OBJ_TREE, 0); } From c077a4526ba456ada28d16db9c945afd9a4a57de Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 8 May 2018 12:37:26 -0700 Subject: [PATCH 03/13] object: add repository argument to grow_object_hash Add a repository argument to allow the caller of grow_object_hash to be more specific about which repository to handle. This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/object.c b/object.c index 2de029275b..91edc30770 100644 --- a/object.c +++ b/object.c @@ -116,7 +116,8 @@ struct object *lookup_object(const unsigned char *sha1) * power of 2 (but at least 32). Copy the existing values to the new * hash map. */ -static void grow_object_hash(void) +#define grow_object_hash(r) grow_object_hash_##r() +static void grow_object_hash_the_repository(void) { int i; /* @@ -147,7 +148,7 @@ void *create_object_the_repository(const unsigned char *sha1, void *o) hashcpy(obj->oid.hash, sha1); if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) - grow_object_hash(); + grow_object_hash(the_repository); insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, the_repository->parsed_objects->obj_hash_size); From f0de1d62ae5982630cfb2c713ddcda853b9ee0cf Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:27 -0700 Subject: [PATCH 04/13] alloc: add repository argument to alloc_blob_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 2 +- blob.c | 2 +- cache.h | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/alloc.c b/alloc.c index 12afadfacd..6c5c376a25 100644 --- a/alloc.c +++ b/alloc.c @@ -49,7 +49,7 @@ static inline void *alloc_node(struct alloc_state *s, size_t node_size) } static struct alloc_state blob_state; -void *alloc_blob_node(void) +void *alloc_blob_node_the_repository(void) { struct blob *b = alloc_node(&blob_state, sizeof(struct blob)); b->object.type = OBJ_BLOB; diff --git a/blob.c b/blob.c index 85c2143f29..9e64f30189 100644 --- a/blob.c +++ b/blob.c @@ -9,7 +9,7 @@ struct blob *lookup_blob(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_blob_node()); + alloc_blob_node(the_repository)); return object_as_type(obj, OBJ_BLOB, 0); } diff --git a/cache.h b/cache.h index 3a4d80e92b..2258e61127 100644 --- a/cache.h +++ b/cache.h @@ -1764,7 +1764,8 @@ int decode_85(char *dst, const char *line, int linelen); void encode_85(char *buf, const unsigned char *data, int bytes); /* alloc.c */ -extern void *alloc_blob_node(void); +#define alloc_blob_node(r) alloc_blob_node_##r() +extern void *alloc_blob_node_the_repository(void); extern void *alloc_tree_node(void); extern void *alloc_commit_node(void); extern void *alloc_tag_node(void); From cf7203bdc65f11de89d27f52115cb98052c52ce8 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:28 -0700 Subject: [PATCH 05/13] alloc: add repository argument to alloc_tree_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 2 +- cache.h | 3 ++- tree.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/alloc.c b/alloc.c index 6c5c376a25..2c8d143075 100644 --- a/alloc.c +++ b/alloc.c @@ -57,7 +57,7 @@ void *alloc_blob_node_the_repository(void) } static struct alloc_state tree_state; -void *alloc_tree_node(void) +void *alloc_tree_node_the_repository(void) { struct tree *t = alloc_node(&tree_state, sizeof(struct tree)); t->object.type = OBJ_TREE; diff --git a/cache.h b/cache.h index 2258e61127..1717d07a2c 100644 --- a/cache.h +++ b/cache.h @@ -1766,7 +1766,8 @@ void encode_85(char *buf, const unsigned char *data, int bytes); /* alloc.c */ #define alloc_blob_node(r) alloc_blob_node_##r() extern void *alloc_blob_node_the_repository(void); -extern void *alloc_tree_node(void); +#define alloc_tree_node(r) alloc_tree_node_##r() +extern void *alloc_tree_node_the_repository(void); extern void *alloc_commit_node(void); extern void *alloc_tag_node(void); extern void *alloc_object_node(void); diff --git a/tree.c b/tree.c index 63730e3fb4..58cf19b4fa 100644 --- a/tree.c +++ b/tree.c @@ -197,7 +197,7 @@ struct tree *lookup_tree(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_tree_node()); + alloc_tree_node(the_repository)); return object_as_type(obj, OBJ_TREE, 0); } From 8ba0e5ec57e4a7da18f735416e3028a9a8b8b1ad Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:29 -0700 Subject: [PATCH 06/13] alloc: add repository argument to alloc_commit_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 2 +- blame.c | 2 +- cache.h | 3 ++- commit.c | 2 +- merge-recursive.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/alloc.c b/alloc.c index 2c8d143075..9e2b897ec1 100644 --- a/alloc.c +++ b/alloc.c @@ -88,7 +88,7 @@ unsigned int alloc_commit_index(void) return count++; } -void *alloc_commit_node(void) +void *alloc_commit_node_the_repository(void) { struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; diff --git a/blame.c b/blame.c index dfa24473dc..ba9b18e754 100644 --- a/blame.c +++ b/blame.c @@ -161,7 +161,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, read_cache(); time(&now); - commit = alloc_commit_node(); + commit = alloc_commit_node(the_repository); commit->object.parsed = 1; commit->date = now; parent_tail = &commit->parents; diff --git a/cache.h b/cache.h index 1717d07a2c..bf6e8c87d8 100644 --- a/cache.h +++ b/cache.h @@ -1768,7 +1768,8 @@ void encode_85(char *buf, const unsigned char *data, int bytes); extern void *alloc_blob_node_the_repository(void); #define alloc_tree_node(r) alloc_tree_node_##r() extern void *alloc_tree_node_the_repository(void); -extern void *alloc_commit_node(void); +#define alloc_commit_node(r) alloc_commit_node_##r() +extern void *alloc_commit_node_the_repository(void); extern void *alloc_tag_node(void); extern void *alloc_object_node(void); extern void alloc_report(void); diff --git a/commit.c b/commit.c index 9106acf0aa..a9a43e79ba 100644 --- a/commit.c +++ b/commit.c @@ -51,7 +51,7 @@ struct commit *lookup_commit(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_commit_node()); + alloc_commit_node(the_repository)); return object_as_type(obj, OBJ_COMMIT, 0); } diff --git a/merge-recursive.c b/merge-recursive.c index 0c0d48624d..6dac890864 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -98,7 +98,7 @@ static struct tree *shift_tree_object(struct tree *one, struct tree *two, static struct commit *make_virtual_commit(struct tree *tree, const char *comment) { - struct commit *commit = alloc_commit_node(); + struct commit *commit = alloc_commit_node(the_repository); set_merge_remote_desc(commit, comment, (struct object *)commit); commit->tree = tree; From a0bd9086bb66fa8cb84bd4fac6441699121e1327 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:30 -0700 Subject: [PATCH 07/13] alloc: add repository argument to alloc_tag_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 2 +- cache.h | 3 ++- tag.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/alloc.c b/alloc.c index 9e2b897ec1..290250e359 100644 --- a/alloc.c +++ b/alloc.c @@ -65,7 +65,7 @@ void *alloc_tree_node_the_repository(void) } static struct alloc_state tag_state; -void *alloc_tag_node(void) +void *alloc_tag_node_the_repository(void) { struct tag *t = alloc_node(&tag_state, sizeof(struct tag)); t->object.type = OBJ_TAG; diff --git a/cache.h b/cache.h index bf6e8c87d8..32f340cde5 100644 --- a/cache.h +++ b/cache.h @@ -1770,7 +1770,8 @@ extern void *alloc_blob_node_the_repository(void); extern void *alloc_tree_node_the_repository(void); #define alloc_commit_node(r) alloc_commit_node_##r() extern void *alloc_commit_node_the_repository(void); -extern void *alloc_tag_node(void); +#define alloc_tag_node(r) alloc_tag_node_##r() +extern void *alloc_tag_node_the_repository(void); extern void *alloc_object_node(void); extern void alloc_report(void); extern unsigned int alloc_commit_index(void); diff --git a/tag.c b/tag.c index 7150b759d6..02ef4eaafc 100644 --- a/tag.c +++ b/tag.c @@ -94,7 +94,7 @@ struct tag *lookup_tag(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_tag_node()); + alloc_tag_node(the_repository)); return object_as_type(obj, OBJ_TAG, 0); } From 13e3fdcb767fe860953f8c27eb1985bd5d15674d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:31 -0700 Subject: [PATCH 08/13] alloc: add repository argument to alloc_object_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 2 +- cache.h | 3 ++- object.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/alloc.c b/alloc.c index 290250e359..f031ce422d 100644 --- a/alloc.c +++ b/alloc.c @@ -73,7 +73,7 @@ void *alloc_tag_node_the_repository(void) } static struct alloc_state object_state; -void *alloc_object_node(void) +void *alloc_object_node_the_repository(void) { struct object *obj = alloc_node(&object_state, sizeof(union any_object)); obj->type = OBJ_NONE; diff --git a/cache.h b/cache.h index 32f340cde5..2d60359a96 100644 --- a/cache.h +++ b/cache.h @@ -1772,7 +1772,8 @@ extern void *alloc_tree_node_the_repository(void); extern void *alloc_commit_node_the_repository(void); #define alloc_tag_node(r) alloc_tag_node_##r() extern void *alloc_tag_node_the_repository(void); -extern void *alloc_object_node(void); +#define alloc_object_node(r) alloc_object_node_##r() +extern void *alloc_object_node_the_repository(void); extern void alloc_report(void); extern unsigned int alloc_commit_index(void); diff --git a/object.c b/object.c index 91edc30770..b8c3f923c5 100644 --- a/object.c +++ b/object.c @@ -180,7 +180,7 @@ struct object *lookup_unknown_object(const unsigned char *sha1) struct object *obj = lookup_object(sha1); if (!obj) obj = create_object(the_repository, sha1, - alloc_object_node()); + alloc_object_node(the_repository)); return obj; } From 17bfe87369a726ebf3b1156ffd38c014bbd67e88 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:32 -0700 Subject: [PATCH 09/13] alloc: add repository argument to alloc_report This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 2 +- cache.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/alloc.c b/alloc.c index f031ce422d..28b85b2214 100644 --- a/alloc.c +++ b/alloc.c @@ -105,7 +105,7 @@ static void report(const char *name, unsigned int count, size_t size) #define REPORT(name, type) \ report(#name, name##_state.count, name##_state.count * sizeof(type) >> 10) -void alloc_report(void) +void alloc_report_the_repository(void) { REPORT(blob, struct blob); REPORT(tree, struct tree); diff --git a/cache.h b/cache.h index 2d60359a96..01cc207d21 100644 --- a/cache.h +++ b/cache.h @@ -1774,7 +1774,8 @@ extern void *alloc_commit_node_the_repository(void); extern void *alloc_tag_node_the_repository(void); #define alloc_object_node(r) alloc_object_node_##r() extern void *alloc_object_node_the_repository(void); -extern void alloc_report(void); +#define alloc_report(r) alloc_report_##r() +extern void alloc_report_the_repository(void); extern unsigned int alloc_commit_index(void); /* pkt-line.c */ From dd5d9deb0155a27cb2d74d1a0faf0af84ef5f355 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:33 -0700 Subject: [PATCH 10/13] alloc: add repository argument to alloc_commit_index This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 4 ++-- cache.h | 3 ++- object.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/alloc.c b/alloc.c index 28b85b2214..277dadd221 100644 --- a/alloc.c +++ b/alloc.c @@ -82,7 +82,7 @@ void *alloc_object_node_the_repository(void) static struct alloc_state commit_state; -unsigned int alloc_commit_index(void) +unsigned int alloc_commit_index_the_repository(void) { static unsigned int count; return count++; @@ -92,7 +92,7 @@ void *alloc_commit_node_the_repository(void) { struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; - c->index = alloc_commit_index(); + c->index = alloc_commit_index(the_repository); return c; } diff --git a/cache.h b/cache.h index 01cc207d21..0e6c5dd563 100644 --- a/cache.h +++ b/cache.h @@ -1776,7 +1776,8 @@ extern void *alloc_tag_node_the_repository(void); extern void *alloc_object_node_the_repository(void); #define alloc_report(r) alloc_report_##r() extern void alloc_report_the_repository(void); -extern unsigned int alloc_commit_index(void); +#define alloc_commit_index(r) alloc_commit_index_##r() +extern unsigned int alloc_commit_index_the_repository(void); /* pkt-line.c */ void packet_trace_identity(const char *prog); diff --git a/object.c b/object.c index b8c3f923c5..a365a91085 100644 --- a/object.c +++ b/object.c @@ -162,7 +162,7 @@ void *object_as_type(struct object *obj, enum object_type type, int quiet) return obj; else if (obj->type == OBJ_NONE) { if (type == OBJ_COMMIT) - ((struct commit *)obj)->index = alloc_commit_index(); + ((struct commit *)obj)->index = alloc_commit_index(the_repository); obj->type = type; return obj; } From 346a817a7271b58811a8004fa225ab88bc94e9c3 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:34 -0700 Subject: [PATCH 11/13] object: allow grow_object_hash to handle arbitrary repositories Reviewed-by: Jonathan Tan Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/object.c b/object.c index a365a91085..0fcd6f6df4 100644 --- a/object.c +++ b/object.c @@ -116,27 +116,27 @@ struct object *lookup_object(const unsigned char *sha1) * power of 2 (but at least 32). Copy the existing values to the new * hash map. */ -#define grow_object_hash(r) grow_object_hash_##r() -static void grow_object_hash_the_repository(void) +static void grow_object_hash(struct repository *r) { int i; /* * Note that this size must always be power-of-2 to match hash_obj * above. */ - int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size; + int new_hash_size = r->parsed_objects->obj_hash_size < 32 ? 32 : 2 * r->parsed_objects->obj_hash_size; struct object **new_hash; new_hash = xcalloc(new_hash_size, sizeof(struct object *)); - for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { - struct object *obj = the_repository->parsed_objects->obj_hash[i]; + for (i = 0; i < r->parsed_objects->obj_hash_size; i++) { + struct object *obj = r->parsed_objects->obj_hash[i]; + if (!obj) continue; insert_obj_hash(obj, new_hash, new_hash_size); } - free(the_repository->parsed_objects->obj_hash); - the_repository->parsed_objects->obj_hash = new_hash; - the_repository->parsed_objects->obj_hash_size = new_hash_size; + free(r->parsed_objects->obj_hash); + r->parsed_objects->obj_hash = new_hash; + r->parsed_objects->obj_hash_size = new_hash_size; } void *create_object_the_repository(const unsigned char *sha1, void *o) From 341e45e46bba094ef1274957ef5891f43e91b344 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:35 -0700 Subject: [PATCH 12/13] object: allow create_object to handle arbitrary repositories Reviewed-by: Jonathan Tan Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- object.c | 12 ++++++------ object.h | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/object.c b/object.c index 0fcd6f6df4..49b952e929 100644 --- a/object.c +++ b/object.c @@ -139,7 +139,7 @@ static void grow_object_hash(struct repository *r) r->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object_the_repository(const unsigned char *sha1, void *o) +void *create_object(struct repository *r, const unsigned char *sha1, void *o) { struct object *obj = o; @@ -147,12 +147,12 @@ void *create_object_the_repository(const unsigned char *sha1, void *o) obj->flags = 0; hashcpy(obj->oid.hash, sha1); - if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) - grow_object_hash(the_repository); + if (r->parsed_objects->obj_hash_size - 1 <= r->parsed_objects->nr_objs * 2) + grow_object_hash(r); - insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, - the_repository->parsed_objects->obj_hash_size); - the_repository->parsed_objects->nr_objs++; + insert_obj_hash(obj, r->parsed_objects->obj_hash, + r->parsed_objects->obj_hash_size); + r->parsed_objects->nr_objs++; return obj; } diff --git a/object.h b/object.h index 2cb0b24108..b41d7a3acc 100644 --- a/object.h +++ b/object.h @@ -93,8 +93,7 @@ extern struct object *get_indexed_object(unsigned int); */ struct object *lookup_object(const unsigned char *sha1); -#define create_object(r, s, o) create_object_##r(s, o) -extern void *create_object_the_repository(const unsigned char *sha1, void *obj); +extern void *create_object(struct repository *r, const unsigned char *sha1, void *obj); void *object_as_type(struct object *obj, enum object_type type, int quiet); From 14ba97f81c7b94e10d591b363688a073023f332d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 15 May 2018 14:48:42 -0700 Subject: [PATCH 13/13] alloc: allow arbitrary repositories for alloc functions We have to convert all of the alloc functions at once, because alloc_report uses a funky macro for reporting. It is better for the sake of mechanical conversion to convert multiple functions at once rather than changing the structure of the reporting function. We record all memory allocation in alloc.c, and free them in clear_alloc_state, which is called for all repositories except the_repository. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- alloc.c | 65 ++++++++++++++++++++++++++++++----------------- alloc.h | 19 ++++++++++++++ blame.c | 1 + blob.c | 1 + cache.h | 16 ------------ commit.c | 12 +++++++++ commit.h | 6 +++++ merge-recursive.c | 1 + object.c | 42 ++++++++++++++++++++++++++++-- object.h | 8 ++++++ tag.c | 9 +++++++ tag.h | 1 + tree.c | 1 + 13 files changed, 140 insertions(+), 42 deletions(-) create mode 100644 alloc.h diff --git a/alloc.c b/alloc.c index 277dadd221..714df63316 100644 --- a/alloc.c +++ b/alloc.c @@ -4,8 +4,7 @@ * Copyright (C) 2006 Linus Torvalds * * The standard malloc/free wastes too much space for objects, partly because - * it maintains all the allocation infrastructure (which isn't needed, since - * we never free an object descriptor anyway), but even more because it ends + * it maintains all the allocation infrastructure, but even more because it ends * up with maximal alignment because it doesn't know what the object alignment * for the new allocation is. */ @@ -15,6 +14,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #define BLOCKING 1024 @@ -30,8 +30,27 @@ struct alloc_state { int count; /* total number of nodes allocated */ int nr; /* number of nodes left in current allocation */ void *p; /* first free node in current allocation */ + + /* bookkeeping of allocations */ + void **slabs; + int slab_nr, slab_alloc; }; +void *allocate_alloc_state(void) +{ + return xcalloc(1, sizeof(struct alloc_state)); +} + +void clear_alloc_state(struct alloc_state *s) +{ + while (s->slab_nr > 0) { + s->slab_nr--; + free(s->slabs[s->slab_nr]); + } + + FREE_AND_NULL(s->slabs); +} + static inline void *alloc_node(struct alloc_state *s, size_t node_size) { void *ret; @@ -39,60 +58,57 @@ static inline void *alloc_node(struct alloc_state *s, size_t node_size) if (!s->nr) { s->nr = BLOCKING; s->p = xmalloc(BLOCKING * node_size); + + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc); + s->slabs[s->slab_nr++] = s->p; } s->nr--; s->count++; ret = s->p; s->p = (char *)s->p + node_size; memset(ret, 0, node_size); + return ret; } -static struct alloc_state blob_state; -void *alloc_blob_node_the_repository(void) +void *alloc_blob_node(struct repository *r) { - struct blob *b = alloc_node(&blob_state, sizeof(struct blob)); + struct blob *b = alloc_node(r->parsed_objects->blob_state, sizeof(struct blob)); b->object.type = OBJ_BLOB; return b; } -static struct alloc_state tree_state; -void *alloc_tree_node_the_repository(void) +void *alloc_tree_node(struct repository *r) { - struct tree *t = alloc_node(&tree_state, sizeof(struct tree)); + struct tree *t = alloc_node(r->parsed_objects->tree_state, sizeof(struct tree)); t->object.type = OBJ_TREE; return t; } -static struct alloc_state tag_state; -void *alloc_tag_node_the_repository(void) +void *alloc_tag_node(struct repository *r) { - struct tag *t = alloc_node(&tag_state, sizeof(struct tag)); + struct tag *t = alloc_node(r->parsed_objects->tag_state, sizeof(struct tag)); t->object.type = OBJ_TAG; return t; } -static struct alloc_state object_state; -void *alloc_object_node_the_repository(void) +void *alloc_object_node(struct repository *r) { - struct object *obj = alloc_node(&object_state, sizeof(union any_object)); + struct object *obj = alloc_node(r->parsed_objects->object_state, sizeof(union any_object)); obj->type = OBJ_NONE; return obj; } -static struct alloc_state commit_state; - -unsigned int alloc_commit_index_the_repository(void) +unsigned int alloc_commit_index(struct repository *r) { - static unsigned int count; - return count++; + return r->parsed_objects->commit_count++; } -void *alloc_commit_node_the_repository(void) +void *alloc_commit_node(struct repository *r) { - struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); + struct commit *c = alloc_node(r->parsed_objects->commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; - c->index = alloc_commit_index(the_repository); + c->index = alloc_commit_index(r); return c; } @@ -103,9 +119,10 @@ static void report(const char *name, unsigned int count, size_t size) } #define REPORT(name, type) \ - report(#name, name##_state.count, name##_state.count * sizeof(type) >> 10) + report(#name, r->parsed_objects->name##_state->count, \ + r->parsed_objects->name##_state->count * sizeof(type) >> 10) -void alloc_report_the_repository(void) +void alloc_report(struct repository *r) { REPORT(blob, struct blob); REPORT(tree, struct tree); diff --git a/alloc.h b/alloc.h new file mode 100644 index 0000000000..3e4e828db4 --- /dev/null +++ b/alloc.h @@ -0,0 +1,19 @@ +#ifndef ALLOC_H +#define ALLOC_H + +struct tree; +struct commit; +struct tag; + +void *alloc_blob_node(struct repository *r); +void *alloc_tree_node(struct repository *r); +void *alloc_commit_node(struct repository *r); +void *alloc_tag_node(struct repository *r); +void *alloc_object_node(struct repository *r); +void alloc_report(struct repository *r); +unsigned int alloc_commit_index(struct repository *r); + +void *allocate_alloc_state(void); +void clear_alloc_state(struct alloc_state *s); + +#endif diff --git a/blame.c b/blame.c index ba9b18e754..3a11f1ce52 100644 --- a/blame.c +++ b/blame.c @@ -6,6 +6,7 @@ #include "diffcore.h" #include "tag.h" #include "blame.h" +#include "alloc.h" void blame_origin_decref(struct blame_origin *o) { diff --git a/blob.c b/blob.c index 9e64f30189..458dafa811 100644 --- a/blob.c +++ b/blob.c @@ -1,6 +1,7 @@ #include "cache.h" #include "blob.h" #include "repository.h" +#include "alloc.h" const char *blob_type = "blob"; diff --git a/cache.h b/cache.h index 0e6c5dd563..c75559b7d3 100644 --- a/cache.h +++ b/cache.h @@ -1763,22 +1763,6 @@ extern const char *excludes_file; int decode_85(char *dst, const char *line, int linelen); void encode_85(char *buf, const unsigned char *data, int bytes); -/* alloc.c */ -#define alloc_blob_node(r) alloc_blob_node_##r() -extern void *alloc_blob_node_the_repository(void); -#define alloc_tree_node(r) alloc_tree_node_##r() -extern void *alloc_tree_node_the_repository(void); -#define alloc_commit_node(r) alloc_commit_node_##r() -extern void *alloc_commit_node_the_repository(void); -#define alloc_tag_node(r) alloc_tag_node_##r() -extern void *alloc_tag_node_the_repository(void); -#define alloc_object_node(r) alloc_object_node_##r() -extern void *alloc_object_node_the_repository(void); -#define alloc_report(r) alloc_report_##r() -extern void alloc_report_the_repository(void); -#define alloc_commit_index(r) alloc_commit_index_##r() -extern unsigned int alloc_commit_index_the_repository(void); - /* pkt-line.c */ void packet_trace_identity(const char *prog); diff --git a/commit.c b/commit.c index a9a43e79ba..5eb4d2f08f 100644 --- a/commit.c +++ b/commit.c @@ -6,6 +6,7 @@ #include "diff.h" #include "revision.h" #include "notes.h" +#include "alloc.h" #include "gpg-interface.h" #include "mergesort.h" #include "commit-slab.h" @@ -296,6 +297,17 @@ void free_commit_buffer(struct commit *commit) } } +void release_commit_memory(struct commit *c) +{ + c->tree = NULL; + c->index = 0; + free_commit_buffer(c); + free_commit_list(c->parents); + /* TODO: what about commit->util? */ + + c->object.parsed = 0; +} + const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) { struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit); diff --git a/commit.h b/commit.h index 0fb8271665..2d764ab7d8 100644 --- a/commit.h +++ b/commit.h @@ -99,6 +99,12 @@ void unuse_commit_buffer(const struct commit *, const void *buffer); */ void free_commit_buffer(struct commit *); +/* + * Release memory related to a commit, including the parent list and + * any cached object buffer. + */ +void release_commit_memory(struct commit *c); + /* * Disassociate any cached object buffer from the commit, but do not free it. * The buffer (or NULL, if none) is returned. diff --git a/merge-recursive.c b/merge-recursive.c index 6dac890864..cbded673c2 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -15,6 +15,7 @@ #include "diff.h" #include "diffcore.h" #include "tag.h" +#include "alloc.h" #include "unpack-trees.h" #include "string-list.h" #include "xdiff-interface.h" diff --git a/object.c b/object.c index 49b952e929..8e29f63bf2 100644 --- a/object.c +++ b/object.c @@ -5,6 +5,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #include "object-store.h" #include "packfile.h" @@ -455,6 +456,13 @@ struct parsed_object_pool *parsed_object_pool_new(void) { struct parsed_object_pool *o = xmalloc(sizeof(*o)); memset(o, 0, sizeof(*o)); + + o->blob_state = allocate_alloc_state(); + o->tree_state = allocate_alloc_state(); + o->commit_state = allocate_alloc_state(); + o->tag_state = allocate_alloc_state(); + o->object_state = allocate_alloc_state(); + return o; } @@ -501,9 +509,39 @@ void raw_object_store_clear(struct raw_object_store *o) void parsed_object_pool_clear(struct parsed_object_pool *o) { /* - * TOOD free objects in o->obj_hash. - * * As objects are allocated in slabs (see alloc.c), we do * not need to free each object, but each slab instead. + * + * Before doing so, we need to free any additional memory + * the objects may hold. */ + unsigned i; + + for (i = 0; i < o->obj_hash_size; i++) { + struct object *obj = o->obj_hash[i]; + + if (!obj) + continue; + + if (obj->type == OBJ_TREE) + free_tree_buffer((struct tree*)obj); + else if (obj->type == OBJ_COMMIT) + release_commit_memory((struct commit*)obj); + else if (obj->type == OBJ_TAG) + release_tag_memory((struct tag*)obj); + } + + FREE_AND_NULL(o->obj_hash); + o->obj_hash_size = 0; + + clear_alloc_state(o->blob_state); + clear_alloc_state(o->tree_state); + clear_alloc_state(o->commit_state); + clear_alloc_state(o->tag_state); + clear_alloc_state(o->object_state); + FREE_AND_NULL(o->blob_state); + FREE_AND_NULL(o->tree_state); + FREE_AND_NULL(o->commit_state); + FREE_AND_NULL(o->tag_state); + FREE_AND_NULL(o->object_state); } diff --git a/object.h b/object.h index b41d7a3acc..7916edb4ed 100644 --- a/object.h +++ b/object.h @@ -4,6 +4,14 @@ struct parsed_object_pool { struct object **obj_hash; int nr_objs, obj_hash_size; + + /* TODO: migrate alloc_states to mem-pool? */ + struct alloc_state *blob_state; + struct alloc_state *tree_state; + struct alloc_state *commit_state; + struct alloc_state *tag_state; + struct alloc_state *object_state; + unsigned commit_count; }; struct parsed_object_pool *parsed_object_pool_new(void); diff --git a/tag.c b/tag.c index 02ef4eaafc..7c12426b4e 100644 --- a/tag.c +++ b/tag.c @@ -3,6 +3,7 @@ #include "commit.h" #include "tree.h" #include "blob.h" +#include "alloc.h" #include "gpg-interface.h" const char *tag_type = "tag"; @@ -115,6 +116,14 @@ static timestamp_t parse_tag_date(const char *buf, const char *tail) return parse_timestamp(dateptr, NULL, 10); } +void release_tag_memory(struct tag *t) +{ + free(t->tag); + t->tagged = NULL; + t->object.parsed = 0; + t->date = 0; +} + int parse_tag_buffer(struct tag *item, const void *data, unsigned long size) { struct object_id oid; diff --git a/tag.h b/tag.h index d469534e82..9057d76a50 100644 --- a/tag.h +++ b/tag.h @@ -15,6 +15,7 @@ struct tag { extern struct tag *lookup_tag(const struct object_id *oid); extern int parse_tag_buffer(struct tag *item, const void *data, unsigned long size); extern int parse_tag(struct tag *item); +extern void release_tag_memory(struct tag *t); extern struct object *deref_tag(struct object *, const char *, int); extern struct object *deref_tag_noverify(struct object *); extern int gpg_verify_tag(const struct object_id *oid, diff --git a/tree.c b/tree.c index 58cf19b4fa..8f8ef3189a 100644 --- a/tree.c +++ b/tree.c @@ -5,6 +5,7 @@ #include "blob.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #include "tree-walk.h" const char *tree_type = "tree";