Merge branch 'ds/bsearch-hash'

Code to find the length to uniquely abbreviate object names based on packfile content, which is a relatively recent addtion, has been optimized to use the same fan-out table. * ds/bsearch-hash: sha1_name: use bsearch_pack() in unique_in_pack() sha1_name: use bsearch_pack() for abbreviations packfile: define and use bsearch_pack() sha1_name: convert struct min_abbrev_data to object_id
2024-11-18 02:53:55 +01:00 · 2018-04-10 16:28:22 +09:00 · 2018-04-10 16:28:22 +09:00 · 51f813c6b3
commit 51f813c6b3
parent 57e4b1c67a 902f5a2119
3 changed files with 42 additions and 57 deletions
--- a/packfile.c
+++ b/packfile.c
@ -1654,6 +1654,29 @@ out:
 	return data;
 }

+int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)
+{
+	const unsigned char *index_fanout = p->index_data;
+	const unsigned char *index_lookup;
+	int index_lookup_width;
+
+	if (!index_fanout)
+		BUG("bsearch_pack called without a valid pack-index");
+
+	index_lookup = index_fanout + 4 * 256;
+	if (p->index_version == 1) {
+		index_lookup_width = 24;
+		index_lookup += 4;
+	} else {
+		index_lookup_width = 20;
+		index_fanout += 8;
+		index_lookup += 8;
+	}
+
+	return bsearch_hash(oid->hash, (const uint32_t*)index_fanout,
+			    index_lookup, index_lookup_width, result);
+}
+
 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
 					    uint32_t n)
 {
@ -1720,30 +1743,17 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
 off_t find_pack_entry_one(const unsigned char *sha1,
 				  struct packed_git *p)
 {
-	const uint32_t *level1_ofs = p->index_data;
 	const unsigned char *index = p->index_data;
-	unsigned stride;
+	struct object_id oid;
 	uint32_t result;

 	if (!index) {
 		if (open_pack_index(p))
 			return 0;
-		level1_ofs = p->index_data;
-		index = p->index_data;
-	}
-	if (p->index_version > 1) {
-		level1_ofs += 2;
-		index += 8;
-	}
-	index += 4 * 256;
-	if (p->index_version > 1) {
-		stride = 20;
-	} else {
-		stride = 24;
-		index += 4;
 	}

-	if (bsearch_hash(sha1, level1_ofs, index, stride, &result))
+	hashcpy(oid.hash, sha1);
+	if (bsearch_pack(&oid, p, &result))
 		return nth_packed_object_offset(p, result);
 	return 0;
 }
--- a/packfile.h
+++ b/packfile.h
@ -78,6 +78,14 @@ extern struct packed_git *add_packed_git(const char *path, size_t path_len, int
 */
 extern void check_pack_index_ptr(const struct packed_git *p, const void *ptr);

+/*
+ * Perform binary search on a pack-index for a given oid. Packfile is expected to
+ * have a valid pack-index.
+ *
+ * See 'bsearch_hash' for more information.
+ */
+int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result);
+
 /*
 * Return the SHA-1 of the nth object within the specified packfile.
 * Open the index if it is not already open.  The return value points
--- a/sha1_name.c
+++ b/sha1_name.c
@ -150,31 +150,14 @@ static int match_sha(unsigned len, const unsigned char *a, const unsigned char *
 static void unique_in_pack(struct packed_git *p,
 			   struct disambiguate_state *ds)
 {
-	uint32_t num, last, i, first = 0;
+	uint32_t num, i, first = 0;
 	const struct object_id *current = NULL;

 	if (open_pack_index(p) || !p->num_objects)
 		return;

 	num = p->num_objects;
-	last = num;
-	while (first < last) {
-		uint32_t mid = first + (last - first) / 2;
-		const unsigned char *current;
-		int cmp;
-
-		current = nth_packed_object_sha1(p, mid);
-		cmp = hashcmp(ds->bin_pfx.hash, current);
-		if (!cmp) {
-			first = mid;
-			break;
-		}
-		if (cmp > 0) {
-			first = mid+1;
-			continue;
-		}
-		last = mid;
-	}
+	bsearch_pack(&ds->bin_pfx, p, &first);

 	/*
 	 * At this point, "first" is the location of the lowest object
@ -480,7 +463,7 @@ struct min_abbrev_data {
 	unsigned int init_len;
 	unsigned int cur_len;
 	char *hex;
-	const unsigned char *hash;
+	const struct object_id *oid;
 };

 static inline char get_hex_char_from_oid(const struct object_id *oid,
@ -512,32 +495,16 @@ static void find_abbrev_len_for_pack(struct packed_git *p,
 				     struct min_abbrev_data *mad)
 {
 	int match = 0;
-	uint32_t num, last, first = 0;
+	uint32_t num, first = 0;
 	struct object_id oid;
+	const struct object_id *mad_oid;

 	if (open_pack_index(p) || !p->num_objects)
 		return;

 	num = p->num_objects;
-	last = num;
-	while (first < last) {
-		uint32_t mid = first + (last - first) / 2;
-		const unsigned char *current;
-		int cmp;
-
-		current = nth_packed_object_sha1(p, mid);
-		cmp = hashcmp(mad->hash, current);
-		if (!cmp) {
-			match = 1;
-			first = mid;
-			break;
-		}
-		if (cmp > 0) {
-			first = mid + 1;
-			continue;
-		}
-		last = mid;
-	}
+	mad_oid = mad->oid;
+	match = bsearch_pack(mad_oid, p, &first);

 	/*
 	 * first is now the position in the packfile where we would insert
@ -603,7 +570,7 @@ int find_unique_abbrev_r(char *hex, const struct object_id *oid, int len)
 	mad.init_len = len;
 	mad.cur_len = len;
 	mad.hex = hex;
-	mad.hash = oid->hash;
+	mad.oid = oid;

 	find_abbrev_len_packed(&mad);