Merge branch 'jk/oid-array-cleanup'

Code clean-up. * jk/oid-array-cleanup: commit-graph: use size_t for array allocation and indexing commit-graph: replace packed_oid_list with oid_array commit-graph: drop count_distinct_commits() function oid-array: provide a for-loop iterator oid-array: make sort function public cache.h: move hash/oid functions to hash.h t0064: make duplicate tests more robust t0064: drop sha1 mention from filename oid-array.h: drop sha1 mention from header guard
2024-05-09 14:56:07 +02:00 · 2020-12-17 15:06:40 -08:00 · 2020-12-17 15:06:40 -08:00 · e5ace7167a
parent 21127fa982 3361390cbe
commit e5ace7167a
6 changed files with 157 additions and 199 deletions
--- a/cache.h
+++ b/cache.h
@ -1123,100 +1123,6 @@ const char *repo_find_unique_abbrev(struct repository *r, const struct object_id
 int repo_find_unique_abbrev_r(struct repository *r, char *hex, const struct object_id *oid, int len);
 #define find_unique_abbrev_r(hex, oid, len) repo_find_unique_abbrev_r(the_repository, hex, oid, len)

-extern const struct object_id null_oid;
-
-static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
-{
-	/*
-	 * Teach the compiler that there are only two possibilities of hash size
-	 * here, so that it can optimize for this case as much as possible.
-	 */
-	if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
-		return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
-	return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
-}
-
-static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
-{
-	return hashcmp(oid1->hash, oid2->hash);
-}
-
-static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
-{
-	/*
-	 * We write this here instead of deferring to hashcmp so that the
-	 * compiler can properly inline it and avoid calling memcmp.
-	 */
-	if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
-		return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
-	return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
-}
-
-static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
-{
-	return hasheq(oid1->hash, oid2->hash);
-}
-
-static inline int is_null_oid(const struct object_id *oid)
-{
-	return oideq(oid, &null_oid);
-}
-
-static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
-{
-	memcpy(sha_dst, sha_src, the_hash_algo->rawsz);
-}
-
-static inline void oidcpy(struct object_id *dst, const struct object_id *src)
-{
-	memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
-}
-
-static inline struct object_id *oiddup(const struct object_id *src)
-{
-	struct object_id *dst = xmalloc(sizeof(struct object_id));
-	oidcpy(dst, src);
-	return dst;
-}
-
-static inline void hashclr(unsigned char *hash)
-{
-	memset(hash, 0, the_hash_algo->rawsz);
-}
-
-static inline void oidclr(struct object_id *oid)
-{
-	memset(oid->hash, 0, GIT_MAX_RAWSZ);
-}
-
-static inline void oidread(struct object_id *oid, const unsigned char *hash)
-{
-	memcpy(oid->hash, hash, the_hash_algo->rawsz);
-}
-
-static inline int is_empty_blob_sha1(const unsigned char *sha1)
-{
-	return hasheq(sha1, the_hash_algo->empty_blob->hash);
-}
-
-static inline int is_empty_blob_oid(const struct object_id *oid)
-{
-	return oideq(oid, the_hash_algo->empty_blob);
-}
-
-static inline int is_empty_tree_sha1(const unsigned char *sha1)
-{
-	return hasheq(sha1, the_hash_algo->empty_tree->hash);
-}
-
-static inline int is_empty_tree_oid(const struct object_id *oid)
-{
-	return oideq(oid, the_hash_algo->empty_tree);
-}
-
-const char *empty_tree_oid_hex(void);
-const char *empty_blob_oid_hex(void);
-
 /* set default permissions by passing mode arguments to open(2) */
 int git_mkstemps_mode(char *pattern, int suffix_len, int mode);
 int git_mkstemp_mode(char *pattern, int mode);
--- a/commit-graph.c
+++ b/commit-graph.c
@ -932,21 +932,15 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit

 struct packed_commit_list {
 	struct commit **list;
-	int nr;
-	int alloc;
-};
-
-struct packed_oid_list {
-	struct object_id *list;
-	int nr;
-	int alloc;
+	size_t nr;
+	size_t alloc;
 };

 struct write_commit_graph_context {
 	struct repository *r;
 	struct object_directory *odb;
 	char *graph_name;
-	struct packed_oid_list oids;
+	struct oid_array oids;
 	struct packed_commit_list commits;
 	int num_extra_edges;
 	unsigned long approx_nr_objects;
@ -1240,13 +1234,6 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
 	return 0;
 }

-static int oid_compare(const void *_a, const void *_b)
-{
-	const struct object_id *a = (const struct object_id *)_a;
-	const struct object_id *b = (const struct object_id *)_b;
-	return oidcmp(a, b);
-}
-
 static int add_packed_commits(const struct object_id *oid,
 			      struct packed_git *pack,
 			      uint32_t pos,
@ -1267,10 +1254,7 @@ static int add_packed_commits(const struct object_id *oid,
 	if (type != OBJ_COMMIT)
 		return 0;

-	ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
-	oidcpy(&(ctx->oids.list[ctx->oids.nr]), oid);
-	ctx->oids.nr++;
-
+	oid_array_append(&ctx->oids, oid);
 	set_commit_pos(ctx->r, oid);

 	return 0;
@ -1281,9 +1265,7 @@ static void add_missing_parents(struct write_commit_graph_context *ctx, struct c
 	struct commit_list *parent;
 	for (parent = commit->parents; parent; parent = parent->next) {
 		if (!(parent->item->object.flags & REACHABLE)) {
-			ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
-			oidcpy(&ctx->oids.list[ctx->oids.nr], &(parent->item->object.oid));
-			ctx->oids.nr++;
+			oid_array_append(&ctx->oids, &parent->item->object.oid);
 			parent->item->object.flags |= REACHABLE;
 		}
 	}
@ -1302,7 +1284,7 @@ static void close_reachable(struct write_commit_graph_context *ctx)
 					ctx->oids.nr);
 	for (i = 0; i < ctx->oids.nr; i++) {
 		display_progress(ctx->progress, i + 1);
-		commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
+		commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
 		if (commit)
 			commit->object.flags |= REACHABLE;
 	}
@ -1319,7 +1301,7 @@ static void close_reachable(struct write_commit_graph_context *ctx)
 					0);
 	for (i = 0; i < ctx->oids.nr; i++) {
 		display_progress(ctx->progress, i + 1);
-		commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
+		commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);

 		if (!commit)
 			continue;
@ -1339,7 +1321,7 @@ static void close_reachable(struct write_commit_graph_context *ctx)
 					ctx->oids.nr);
 	for (i = 0; i < ctx->oids.nr; i++) {
 		display_progress(ctx->progress, i + 1);
-		commit = lookup_commit(ctx->r, &ctx->oids.list[i]);
+		commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);

 		if (commit)
 			commit->object.flags &= ~REACHABLE;
@ -1567,9 +1549,7 @@ static int fill_oids_from_commits(struct write_commit_graph_context *ctx,

 	oidset_iter_init(commits, &iter);
 	while ((oid = oidset_iter_next(&iter))) {
-		ALLOC_GROW(ctx->oids.list, ctx->oids.nr + 1, ctx->oids.alloc);
-		oidcpy(&ctx->oids.list[ctx->oids.nr], oid);
-		ctx->oids.nr++;
+		oid_array_append(&ctx->oids, oid);
 	}

 	return 0;
@ -1588,35 +1568,6 @@ static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
 	stop_progress(&ctx->progress);
 }

-static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
-{
-	uint32_t i, count_distinct = 1;
-
-	if (ctx->report_progress)
-		ctx->progress = start_delayed_progress(
-			_("Counting distinct commits in commit graph"),
-			ctx->oids.nr);
-	display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */
-	QSORT(ctx->oids.list, ctx->oids.nr, oid_compare);
-
-	for (i = 1; i < ctx->oids.nr; i++) {
-		display_progress(ctx->progress, i + 1);
-		if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) {
-			if (ctx->split) {
-				struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]);
-
-				if (!c || commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
-					continue;
-			}
-
-			count_distinct++;
-		}
-	}
-	stop_progress(&ctx->progress);
-
-	return count_distinct;
-}
-
 static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
 {
 	uint32_t i;
@ -1628,15 +1579,14 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
 		ctx->progress = start_delayed_progress(
 			_("Finding extra edges in commit graph"),
 			ctx->oids.nr);
-	for (i = 0; i < ctx->oids.nr; i++) {
+	oid_array_sort(&ctx->oids);
+	for (i = 0; i < ctx->oids.nr; i = oid_array_next_unique(&ctx->oids, i)) {
 		unsigned int num_parents;

 		display_progress(ctx->progress, i + 1);
-		if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i]))
-			continue;

 		ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
-		ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]);
+		ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.oid[i]);

 		if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
 		    commit_graph_position(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH)
@ -2155,7 +2105,7 @@ int write_commit_graph(struct object_directory *odb,
 		       const struct commit_graph_opts *opts)
 {
 	struct write_commit_graph_context *ctx;
-	uint32_t i, count_distinct = 0;
+	uint32_t i;
 	int res = 0;
 	int replace = 0;
 	struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@ -2227,26 +2177,16 @@ int write_commit_graph(struct object_directory *odb,
 	}

 	ctx->approx_nr_objects = approximate_object_count();
-	ctx->oids.alloc = ctx->approx_nr_objects / 32;

-	if (ctx->split && opts && ctx->oids.alloc > opts->max_commits)
-		ctx->oids.alloc = opts->max_commits;
-
-	if (ctx->append) {
+	if (ctx->append)
 		prepare_commit_graph_one(ctx->r, ctx->odb);
-		if (ctx->r->objects->commit_graph)
-			ctx->oids.alloc += ctx->r->objects->commit_graph->num_commits;
-	}
-
-	if (ctx->oids.alloc < 1024)
-		ctx->oids.alloc = 1024;
-	ALLOC_ARRAY(ctx->oids.list, ctx->oids.alloc);

 	if (ctx->append && ctx->r->objects->commit_graph) {
 		struct commit_graph *g = ctx->r->objects->commit_graph;
 		for (i = 0; i < g->num_commits; i++) {
-			const unsigned char *hash = g->chunk_oid_lookup + g->hash_len * i;
-			hashcpy(ctx->oids.list[ctx->oids.nr++].hash, hash);
+			struct object_id oid;
+			hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * i);
+			oid_array_append(&ctx->oids, &oid);
 		}
 	}

@ -2268,17 +2208,6 @@ int write_commit_graph(struct object_directory *odb,

 	close_reachable(ctx);

-	count_distinct = count_distinct_commits(ctx);
-
-	if (count_distinct >= GRAPH_EDGE_LAST_MASK) {
-		error(_("the commit graph format cannot write %d commits"), count_distinct);
-		res = -1;
-		goto cleanup;
-	}
-
-	ctx->commits.alloc = count_distinct;
-	ALLOC_ARRAY(ctx->commits.list, ctx->commits.alloc);
-
 	copy_oids_to_commits(ctx);

 	if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
@ -2313,7 +2242,7 @@ int write_commit_graph(struct object_directory *odb,
 cleanup:
 	free(ctx->graph_name);
 	free(ctx->commits.list);
-	free(ctx->oids.list);
+	oid_array_clear(&ctx->oids);

 	if (ctx->commit_graph_filenames_after) {
 		for (i = 0; i < ctx->num_commit_graphs_after; i++) {
--- a/hash.h
+++ b/hash.h
@ -2,6 +2,7 @@
 #define HASH_H

 #include "git-compat-util.h"
+#include "repository.h"

 #if defined(SHA1_PPC)
 #include "ppc/sha1.h"
@ -184,4 +185,98 @@ struct object_id {

 #define the_hash_algo the_repository->hash_algo

+extern const struct object_id null_oid;
+
+static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2)
+{
+	/*
+	 * Teach the compiler that there are only two possibilities of hash size
+	 * here, so that it can optimize for this case as much as possible.
+	 */
+	if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
+		return memcmp(sha1, sha2, GIT_MAX_RAWSZ);
+	return memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
+}
+
+static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2)
+{
+	return hashcmp(oid1->hash, oid2->hash);
+}
+
+static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2)
+{
+	/*
+	 * We write this here instead of deferring to hashcmp so that the
+	 * compiler can properly inline it and avoid calling memcmp.
+	 */
+	if (the_hash_algo->rawsz == GIT_MAX_RAWSZ)
+		return !memcmp(sha1, sha2, GIT_MAX_RAWSZ);
+	return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ);
+}
+
+static inline int oideq(const struct object_id *oid1, const struct object_id *oid2)
+{
+	return hasheq(oid1->hash, oid2->hash);
+}
+
+static inline int is_null_oid(const struct object_id *oid)
+{
+	return oideq(oid, &null_oid);
+}
+
+static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src)
+{
+	memcpy(sha_dst, sha_src, the_hash_algo->rawsz);
+}
+
+static inline void oidcpy(struct object_id *dst, const struct object_id *src)
+{
+	memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ);
+}
+
+static inline struct object_id *oiddup(const struct object_id *src)
+{
+	struct object_id *dst = xmalloc(sizeof(struct object_id));
+	oidcpy(dst, src);
+	return dst;
+}
+
+static inline void hashclr(unsigned char *hash)
+{
+	memset(hash, 0, the_hash_algo->rawsz);
+}
+
+static inline void oidclr(struct object_id *oid)
+{
+	memset(oid->hash, 0, GIT_MAX_RAWSZ);
+}
+
+static inline void oidread(struct object_id *oid, const unsigned char *hash)
+{
+	memcpy(oid->hash, hash, the_hash_algo->rawsz);
+}
+
+static inline int is_empty_blob_sha1(const unsigned char *sha1)
+{
+	return hasheq(sha1, the_hash_algo->empty_blob->hash);
+}
+
+static inline int is_empty_blob_oid(const struct object_id *oid)
+{
+	return oideq(oid, the_hash_algo->empty_blob);
+}
+
+static inline int is_empty_tree_sha1(const unsigned char *sha1)
+{
+	return hasheq(sha1, the_hash_algo->empty_tree->hash);
+}
+
+static inline int is_empty_tree_oid(const struct object_id *oid)
+{
+	return oideq(oid, the_hash_algo->empty_tree);
+}
+
+const char *empty_tree_oid_hex(void);
+const char *empty_blob_oid_hex(void);
+
 #endif
--- a/oid-array.c
+++ b/oid-array.c
@ -14,8 +14,10 @@ static int void_hashcmp(const void *a, const void *b)
 	return oidcmp(a, b);
 }

-static void oid_array_sort(struct oid_array *array)
+void oid_array_sort(struct oid_array *array)
 {
+	if (array->sorted)
+		return;
 	QSORT(array->oid, array->nr, void_hashcmp);
 	array->sorted = 1;
 }
@ -28,8 +30,7 @@ static const unsigned char *sha1_access(size_t index, void *table)

 int oid_array_lookup(struct oid_array *array, const struct object_id *oid)
 {
-	if (!array->sorted)
-		oid_array_sort(array);
+	oid_array_sort(array);
 	return sha1_pos(oid->hash, array->oid, array->nr, sha1_access);
 }

@ -64,14 +65,10 @@ int oid_array_for_each_unique(struct oid_array *array,
 {
 	size_t i;

-	if (!array->sorted)
-		oid_array_sort(array);
+	oid_array_sort(array);

-	for (i = 0; i < array->nr; i++) {
-		int ret;
-		if (i > 0 && oideq(array->oid + i, array->oid + i - 1))
-			continue;
-		ret = fn(array->oid + i, data);
+	for (i = 0; i < array->nr; i = oid_array_next_unique(array, i)) {
+		int ret = fn(array->oid + i, data);
 		if (ret)
 			return ret;
 	}
--- a/oid-array.h
+++ b/oid-array.h
@ -1,5 +1,7 @@
-#ifndef SHA1_ARRAY_H
-#define SHA1_ARRAY_H
+#ifndef OID_ARRAY_H
+#define OID_ARRAY_H
+
+#include "hash.h"

 /**
 * The API provides storage and manipulation of sets of object identifiers.
@ -106,4 +108,30 @@ void oid_array_filter(struct oid_array *array,
 		      for_each_oid_fn want,
 		      void *cbdata);

-#endif /* SHA1_ARRAY_H */
+/**
+ * Sort the array in order of ascending object id.
+ */
+void oid_array_sort(struct oid_array *array);
+
+/**
+ * Find the next unique oid in the array after position "cur".
+ * The array must be sorted for this to work. You can iterate
+ * over unique elements like this:
+ *
+ *   size_t i;
+ *   oid_array_sort(array);
+ *   for (i = 0; i < array->nr; i = oid_array_next_unique(array, i))
+ *	printf("%s", oid_to_hex(array->oids[i]);
+ *
+ * Non-unique iteration can just increment with "i++" to visit each element.
+ */
+static inline size_t oid_array_next_unique(struct oid_array *array, size_t cur)
+{
+	do {
+		cur++;
+	} while (cur < array->nr &&
+		 oideq(array->oid + cur, array->oid + cur - 1));
+	return cur;
+}
+
+#endif /* OID_ARRAY_H */
--- a/t/t0064-sha1-array.sh
+++ b/t/t0064-sha1-array.sh
@ -1,6 +1,6 @@
 #!/bin/sh

-test_description='basic tests for the SHA1 array implementation'
+test_description='basic tests for the oid array implementation'
 . ./test-lib.sh

 echoid () {
@ -25,6 +25,7 @@ test_expect_success 'ordered enumeration' '
 test_expect_success 'ordered enumeration with duplicate suppression' '
 	echoid "" 44 55 88 aa >expect &&
 	{
+		echoid append 88 44 aa 55 &&
 		echoid append 88 44 aa 55 &&
 		echoid append 88 44 aa 55 &&
 		echo for_each_unique
@ -52,17 +53,19 @@ test_expect_success 'lookup non-existing entry' '

 test_expect_success 'lookup with duplicates' '
 	{
+		echoid append 88 44 aa 55 &&
 		echoid append 88 44 aa 55 &&
 		echoid append 88 44 aa 55 &&
 		echoid lookup 55
 	} | test-tool oid-array >actual &&
 	n=$(cat actual) &&
-	test "$n" -ge 2 &&
-	test "$n" -le 3
+	test "$n" -ge 3 &&
+	test "$n" -le 5
 '

 test_expect_success 'lookup non-existing entry with duplicates' '
 	{
+		echoid append 88 44 aa 55 &&
 		echoid append 88 44 aa 55 &&
 		echoid append 88 44 aa 55 &&
 		echoid lookup 66