git/t/helper/test-mergesort.c

#include "test-tool.h"
#include "cache.h"
#include "mem-pool.h"
#include "mergesort.h"
#include "strbuf.h"

static uint32_t minstd_rand(uint32_t *state)
{
	*state = (uint64_t)*state * 48271 % 2147483647;
	return *state;
}

struct line {
	char *text;
	struct line *next;
};

DEFINE_LIST_SORT(static, sort_lines, struct line, next);

static int compare_strings(const struct line *x, const struct line *y)
{
	return strcmp(x->text, y->text);
}

static int sort_stdin(void)
{
	struct line *lines;
	struct line **tail = &lines;
	struct strbuf sb = STRBUF_INIT;
	struct mem_pool lines_pool;
	char *p;

	strbuf_read(&sb, 0, 0);

	/*
	 * Split by newline, but don't create an item
	 * for the empty string after the last separator.
	 */
	if (sb.len && sb.buf[sb.len - 1] == '\n')
		strbuf_setlen(&sb, sb.len - 1);

	mem_pool_init(&lines_pool, 0);
	p = sb.buf;
	for (;;) {
		char *eol = strchr(p, '\n');
		struct line *line = mem_pool_alloc(&lines_pool, sizeof(*line));
		line->text = p;
		*tail = line;
		tail = &line->next;
		if (!eol)
			break;
		*eol = '\0';
		p = eol + 1;
	}
	*tail = NULL;

	sort_lines(&lines, compare_strings);

	while (lines) {
		puts(lines->text);
		lines = lines->next;
	}
	return 0;
}

static void dist_sawtooth(int *arr, int n, int m)
{
	int i;
	for (i = 0; i < n; i++)
		arr[i] = i % m;
}

static void dist_rand(int *arr, int n, int m)
{
	int i;
	uint32_t seed = 1;
	for (i = 0; i < n; i++)
		arr[i] = minstd_rand(&seed) % m;
}

static void dist_stagger(int *arr, int n, int m)
{
	int i;
	for (i = 0; i < n; i++)
		arr[i] = (i * m + i) % n;
}

static void dist_plateau(int *arr, int n, int m)
{
	int i;
	for (i = 0; i < n; i++)
		arr[i] = (i < m) ? i : m;
}

static void dist_shuffle(int *arr, int n, int m)
{
	int i, j, k;
	uint32_t seed = 1;
	for (i = j = 0, k = 1; i < n; i++)
		arr[i] = minstd_rand(&seed) % m ? (j += 2) : (k += 2);
}

#define DIST(name) { #name, dist_##name }

static struct dist {
	const char *name;
	void (*fn)(int *arr, int n, int m);
} dist[] = {
	DIST(sawtooth),
	DIST(rand),
	DIST(stagger),
	DIST(plateau),
	DIST(shuffle),
};

static const struct dist *get_dist_by_name(const char *name)
{
	int i;
	for (i = 0; i < ARRAY_SIZE(dist); i++) {
	       if (!strcmp(dist[i].name, name))
		       return &dist[i];
	}
	return NULL;
}

static void mode_copy(int *arr, int n)
{
	/* nothing */
}

static void mode_reverse(int *arr, int n)
{
	int i, j;
	for (i = 0, j = n - 1; i < j; i++, j--)
		SWAP(arr[i], arr[j]);
}

static void mode_reverse_1st_half(int *arr, int n)
{
	mode_reverse(arr, n / 2);
}

static void mode_reverse_2nd_half(int *arr, int n)
{
	int half = n / 2;
	mode_reverse(arr + half, n - half);
}

static int compare_ints(const void *av, const void *bv)
{
	const int *ap = av, *bp = bv;
	int a = *ap, b = *bp;
	return (a > b) - (a < b);
}

static void mode_sort(int *arr, int n)
{
	QSORT(arr, n, compare_ints);
}

static void mode_dither(int *arr, int n)
{
	int i;
	for (i = 0; i < n; i++)
		arr[i] += i % 5;
}

static void unriffle(int *arr, int n, int *tmp)
{
	int i, j;
	COPY_ARRAY(tmp, arr, n);
	for (i = j = 0; i < n; i += 2)
		arr[j++] = tmp[i];
	for (i = 1; i < n; i += 2)
		arr[j++] = tmp[i];
}

static void unriffle_recursively(int *arr, int n, int *tmp)
{
	if (n > 1) {
		int half = n / 2;
		unriffle(arr, n, tmp);
		unriffle_recursively(arr, half, tmp);
		unriffle_recursively(arr + half, n - half, tmp);
	}
}

static void mode_unriffle(int *arr, int n)
{
	int *tmp;
	ALLOC_ARRAY(tmp, n);
	unriffle_recursively(arr, n, tmp);
	free(tmp);
}

static unsigned int prev_pow2(unsigned int n)
{
	unsigned int pow2 = 1;
	while (pow2 * 2 < n)
		pow2 *= 2;
	return pow2;
}

static void unriffle_recursively_skewed(int *arr, int n, int *tmp)
{
	if (n > 1) {
		int pow2 = prev_pow2(n);
		int rest = n - pow2;
		unriffle(arr + pow2 - rest, rest * 2, tmp);
		unriffle_recursively_skewed(arr, pow2, tmp);
		unriffle_recursively_skewed(arr + pow2, rest, tmp);
	}
}

static void mode_unriffle_skewed(int *arr, int n)
{
	int *tmp;
	ALLOC_ARRAY(tmp, n);
	unriffle_recursively_skewed(arr, n, tmp);
	free(tmp);
}

#define MODE(name) { #name, mode_##name }

static struct mode {
	const char *name;
	void (*fn)(int *arr, int n);
} mode[] = {
	MODE(copy),
	MODE(reverse),
	MODE(reverse_1st_half),
	MODE(reverse_2nd_half),
	MODE(sort),
	MODE(dither),
	MODE(unriffle),
	MODE(unriffle_skewed),
};

static const struct mode *get_mode_by_name(const char *name)
{
	int i;
	for (i = 0; i < ARRAY_SIZE(mode); i++) {
	       if (!strcmp(mode[i].name, name))
		       return &mode[i];
	}
	return NULL;
}

static int generate(int argc, const char **argv)
{
	const struct dist *dist = NULL;
	const struct mode *mode = NULL;
	int i, n, m, *arr;

	if (argc != 4)
		return 1;

	dist = get_dist_by_name(argv[0]);
	mode = get_mode_by_name(argv[1]);
	n = strtol(argv[2], NULL, 10);
	m = strtol(argv[3], NULL, 10);
	if (!dist || !mode)
		return 1;

	ALLOC_ARRAY(arr, n);
	dist->fn(arr, n, m);
	mode->fn(arr, n);
	for (i = 0; i < n; i++)
		printf("%08x\n", arr[i]);
	free(arr);
	return 0;
}

static struct stats {
	int get_next, set_next, compare;
} stats;

struct number {
	int value, rank;
	struct number *next;
};

DEFINE_LIST_SORT_DEBUG(static, sort_numbers, struct number, next,
		       stats.get_next++, stats.set_next++);

static int compare_numbers(const struct number *an, const struct number *bn)
{
	int a = an->value, b = bn->value;
	stats.compare++;
	return (a > b) - (a < b);
}

static void clear_numbers(struct number *list)
{
	while (list) {
		struct number *next = list->next;
		free(list);
		list = next;
	}
}

static int test(const struct dist *dist, const struct mode *mode, int n, int m)
{
	int *arr;
	size_t i;
	struct number *curr, *list, **tail;
	int is_sorted = 1;
	int is_stable = 1;
	const char *verdict;
	int result = -1;

	ALLOC_ARRAY(arr, n);
	dist->fn(arr, n, m);
	mode->fn(arr, n);
	for (i = 0, tail = &list; i < n; i++) {
		curr = xmalloc(sizeof(*curr));
		curr->value = arr[i];
		curr->rank = i;
		*tail = curr;
		tail = &curr->next;
	}
	*tail = NULL;

	stats.get_next = stats.set_next = stats.compare = 0;
	sort_numbers(&list, compare_numbers);

	QSORT(arr, n, compare_ints);
	for (i = 0, curr = list; i < n && curr; i++, curr = curr->next) {
		if (arr[i] != curr->value)
			is_sorted = 0;
		if (curr->next && curr->value == curr->next->value &&
		    curr->rank >= curr->next->rank)
			is_stable = 0;
	}
	if (i < n) {
		verdict = "too short";
	} else if (curr) {
		verdict = "too long";
	} else if (!is_sorted) {
		verdict = "not sorted";
	} else if (!is_stable) {
		verdict = "unstable";
	} else {
		verdict = "OK";
		result = 0;
	}

	printf("%-9s %-16s %8d %8d %8d %8d %8d %s\n",
	       dist->name, mode->name, n, m, stats.get_next, stats.set_next,
	       stats.compare, verdict);

	clear_numbers(list);
	free(arr);

	return result;
}

/*
 * A version of the qsort certification program from "Engineering a Sort
 * Function" by Bentley and McIlroy, Software—Practice and Experience,
 * Volume 23, Issue 11, 1249–1265 (November 1993).
 */
static int run_tests(int argc, const char **argv)
{
	const char *argv_default[] = { "100", "1023", "1024", "1025" };
	if (!argc)
		return run_tests(ARRAY_SIZE(argv_default), argv_default);
	printf("%-9s %-16s %8s %8s %8s %8s %8s %s\n",
	       "distribut", "mode", "n", "m", "get_next", "set_next",
	       "compare", "verdict");
	while (argc--) {
		int i, j, m, n = strtol(*argv++, NULL, 10);
		for (i = 0; i < ARRAY_SIZE(dist); i++) {
			for (j = 0; j < ARRAY_SIZE(mode); j++) {
				for (m = 1; m < 2 * n; m *= 2) {
					if (test(&dist[i], &mode[j], n, m))
						return 1;
				}
			}
		}
	}
	return 0;
}

int cmd__mergesort(int argc, const char **argv)
{
	int i;
	const char *sep;

	if (argc == 6 && !strcmp(argv[1], "generate"))
		return generate(argc - 2, argv + 2);
	if (argc == 2 && !strcmp(argv[1], "sort"))
		return sort_stdin();
	if (argc > 1 && !strcmp(argv[1], "test"))
		return run_tests(argc - 2, argv + 2);
	fprintf(stderr, "usage: test-tool mergesort generate <distribution> <mode> <n> <m>\n");
	fprintf(stderr, "   or: test-tool mergesort sort\n");
	fprintf(stderr, "   or: test-tool mergesort test [<n>...]\n");
	fprintf(stderr, "\n");
	for (i = 0, sep = "distributions: "; i < ARRAY_SIZE(dist); i++, sep = ", ")
		fprintf(stderr, "%s%s", sep, dist[i].name);
	fprintf(stderr, "\n");
	for (i = 0, sep = "modes: "; i < ARRAY_SIZE(mode); i++, sep = ", ")
		fprintf(stderr, "%s%s", sep, mode[i].name);
	fprintf(stderr, "\n");
	return 129;
}
-												t/helper: merge (unused) test-mergesort into test-tool

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2018-03-24 08:44:46 +01:00
+								#include "test-tool.h"
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+								#include "cache.h"
-												treewide: be explicit about dependence on mem-pool.h

Signed-off-by: Elijah Newren <newren@gmail.com>
Acked-by: Calvin Wan <calvinwan@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2023-04-11 09:41:47 +02:00
+								#include "mem-pool.h"
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+								#include "mergesort.h"
-												treewide: be explicit about dependence on strbuf.h

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2023-04-22 22:17:08 +02:00
+								#include "strbuf.h"
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
-												test-mergesort: use repeatable random numbers

Use MINSTD to generate pseudo-random numbers consistently instead of
using rand(3), whose output can vary from system to system, and reset
its seed before filling in the test values.  This gives repeatable
results across versions and systems, which simplifies sharing and
comparing of results between developers.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-08 06:04:42 +02:00
+								static uint32_t minstd_rand(uint32_t *state)
 								{
 									*state = (uint64_t)*state * 48271 % 2147483647;
 									return *state;
 								}
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+								struct line {
 									char *text;
 									struct line *next;
 								};
-												test-mergesort: use DEFINE_LIST_SORT

Build a typed sort function for the mergesort performance test tool
using DEFINE_LIST_SORT instead of calling llist_mergesort().  This gets
rid of the next pointer accessor functions and improves the performance
at the cost of a slightly higher object text size.

Before:
0071.12: llist_mergesort() unsorted    0.24(0.22+0.01)
0071.14: llist_mergesort() sorted      0.12(0.10+0.01)
0071.16: llist_mergesort() reversed    0.12(0.10+0.01)

__TEXT	__DATA	__OBJC	others	dec	hex
6407	276	0	24701	31384	7a98	t/helper/test-mergesort.o

With this patch:
0071.12: DEFINE_LIST_SORT unsorted     0.22(0.21+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.10+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.10+0.01)

__TEXT	__DATA	__OBJC	others	dec	hex
6615	276	0	25832	32723	7fd3	t/helper/test-mergesort.o

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-07-16 18:57:18 +02:00
+								DEFINE_LIST_SORT(static, sort_lines, struct line, next);
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
-												test-mergesort: use DEFINE_LIST_SORT

Build a typed sort function for the mergesort performance test tool
using DEFINE_LIST_SORT instead of calling llist_mergesort().  This gets
rid of the next pointer accessor functions and improves the performance
at the cost of a slightly higher object text size.

Before:
0071.12: llist_mergesort() unsorted    0.24(0.22+0.01)
0071.14: llist_mergesort() sorted      0.12(0.10+0.01)
0071.16: llist_mergesort() reversed    0.12(0.10+0.01)

__TEXT	__DATA	__OBJC	others	dec	hex
6407	276	0	24701	31384	7a98	t/helper/test-mergesort.o

With this patch:
0071.12: DEFINE_LIST_SORT unsorted     0.22(0.21+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.10+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.10+0.01)

__TEXT	__DATA	__OBJC	others	dec	hex
6615	276	0	25832	32723	7fd3	t/helper/test-mergesort.o

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-07-16 18:57:18 +02:00
+								static int compare_strings(const struct line *x, const struct line *y)
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+								{
 									return strcmp(x->text, y->text);
 								}
-												test-mergesort: add sort subcommand

Give the code for sorting a text file its own sub-command.  This allows
extending the helper, which we'll do in the following patches.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:11:19 +02:00
+								static int sort_stdin(void)
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+								{
-												test-mergesort: read sort input all at once

The sort subcommand of test-mergesort is used to test the performance of
sorting linked lists.  It reads lines from stdin, sorts them and prints
the result to stdout.  Two heap allocations are done per line: One for
the linked list item and one for the actual line string.  That imposes a
significant amount of allocation overhead.

Reduce it by doing the same as the sort subcommand of test-string-list,
namely to read the whole input file into a single buffer and then split
it in-place.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.23(0.20+0.01)     0.22(0.20+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.12(0.10+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.12(0.10+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.71(0.00+0.03)     0.54(0.00+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.42(0.00+0.04)     0.21(0.03+0.03)
0071.16: DEFINE_LIST_SORT reversed     0.42(0.06+0.01)     0.21(0.01+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.41(0.39+0.02)     0.29(0.27+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.08+0.02)     0.07(0.06+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.08+0.02)     0.07(0.04+0.03)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:05 +02:00
+									struct line *lines;
 									struct line **tail = &lines;
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+									struct strbuf sb = STRBUF_INIT;
-												test-mergesort: use mem_pool for sort input

The previous patch almost halved the number of heap allocations for the
sort subcommand.  Reduce it further by using a mem_pool for the line
objects.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.22(0.20+0.01)     0.21(0.19+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.10(0.08+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.10(0.08+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.54(0.00+0.06)     0.44(0.01+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.21(0.03+0.03)     0.19(0.04+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.21(0.01+0.04)     0.19(0.04+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.29(0.27+0.01)     0.22(0.19+0.02)
0071.14: DEFINE_LIST_SORT sorted       0.07(0.06+0.01)     0.06(0.04+0.02)
0071.16: DEFINE_LIST_SORT reversed     0.07(0.04+0.03)     0.06(0.04+0.02)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:47 +02:00
+									struct mem_pool lines_pool;
-												test-mergesort: read sort input all at once

The sort subcommand of test-mergesort is used to test the performance of
sorting linked lists.  It reads lines from stdin, sorts them and prints
the result to stdout.  Two heap allocations are done per line: One for
the linked list item and one for the actual line string.  That imposes a
significant amount of allocation overhead.

Reduce it by doing the same as the sort subcommand of test-string-list,
namely to read the whole input file into a single buffer and then split
it in-place.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.23(0.20+0.01)     0.22(0.20+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.12(0.10+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.12(0.10+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.71(0.00+0.03)     0.54(0.00+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.42(0.00+0.04)     0.21(0.03+0.03)
0071.16: DEFINE_LIST_SORT reversed     0.42(0.06+0.01)     0.21(0.01+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.41(0.39+0.02)     0.29(0.27+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.08+0.02)     0.07(0.06+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.08+0.02)     0.07(0.04+0.03)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:05 +02:00
+									char *p;
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
-												test-mergesort: read sort input all at once

The sort subcommand of test-mergesort is used to test the performance of
sorting linked lists.  It reads lines from stdin, sorts them and prints
the result to stdout.  Two heap allocations are done per line: One for
the linked list item and one for the actual line string.  That imposes a
significant amount of allocation overhead.

Reduce it by doing the same as the sort subcommand of test-string-list,
namely to read the whole input file into a single buffer and then split
it in-place.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.23(0.20+0.01)     0.22(0.20+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.12(0.10+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.12(0.10+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.71(0.00+0.03)     0.54(0.00+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.42(0.00+0.04)     0.21(0.03+0.03)
0071.16: DEFINE_LIST_SORT reversed     0.42(0.06+0.01)     0.21(0.01+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.41(0.39+0.02)     0.29(0.27+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.08+0.02)     0.07(0.06+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.08+0.02)     0.07(0.04+0.03)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:05 +02:00
+									strbuf_read(&sb, 0, 0);
 									/*
 									 * Split by newline, but don't create an item
 									 * for the empty string after the last separator.
 									 */
 									if (sb.len && sb.buf[sb.len - 1] == '\n')
 										strbuf_setlen(&sb, sb.len - 1);
-												test-mergesort: use mem_pool for sort input

The previous patch almost halved the number of heap allocations for the
sort subcommand.  Reduce it further by using a mem_pool for the line
objects.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.22(0.20+0.01)     0.21(0.19+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.10(0.08+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.10(0.08+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.54(0.00+0.06)     0.44(0.01+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.21(0.03+0.03)     0.19(0.04+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.21(0.01+0.04)     0.19(0.04+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.29(0.27+0.01)     0.22(0.19+0.02)
0071.14: DEFINE_LIST_SORT sorted       0.07(0.06+0.01)     0.06(0.04+0.02)
0071.16: DEFINE_LIST_SORT reversed     0.07(0.04+0.03)     0.06(0.04+0.02)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:47 +02:00
+									mem_pool_init(&lines_pool, 0);
-												test-mergesort: read sort input all at once

The sort subcommand of test-mergesort is used to test the performance of
sorting linked lists.  It reads lines from stdin, sorts them and prints
the result to stdout.  Two heap allocations are done per line: One for
the linked list item and one for the actual line string.  That imposes a
significant amount of allocation overhead.

Reduce it by doing the same as the sort subcommand of test-string-list,
namely to read the whole input file into a single buffer and then split
it in-place.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.23(0.20+0.01)     0.22(0.20+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.12(0.10+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.12(0.10+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.71(0.00+0.03)     0.54(0.00+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.42(0.00+0.04)     0.21(0.03+0.03)
0071.16: DEFINE_LIST_SORT reversed     0.42(0.06+0.01)     0.21(0.01+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.41(0.39+0.02)     0.29(0.27+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.08+0.02)     0.07(0.06+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.08+0.02)     0.07(0.04+0.03)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:05 +02:00
+									p = sb.buf;
 									for (;;) {
 										char *eol = strchr(p, '\n');
-												test-mergesort: use mem_pool for sort input

The previous patch almost halved the number of heap allocations for the
sort subcommand.  Reduce it further by using a mem_pool for the line
objects.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.22(0.20+0.01)     0.21(0.19+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.10(0.08+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.10(0.08+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.54(0.00+0.06)     0.44(0.01+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.21(0.03+0.03)     0.19(0.04+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.21(0.01+0.04)     0.19(0.04+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.29(0.27+0.01)     0.22(0.19+0.02)
0071.14: DEFINE_LIST_SORT sorted       0.07(0.06+0.01)     0.06(0.04+0.02)
0071.16: DEFINE_LIST_SORT reversed     0.07(0.04+0.03)     0.06(0.04+0.02)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:47 +02:00
+										struct line *line = mem_pool_alloc(&lines_pool, sizeof(*line));
-												test-mergesort: read sort input all at once

The sort subcommand of test-mergesort is used to test the performance of
sorting linked lists.  It reads lines from stdin, sorts them and prints
the result to stdout.  Two heap allocations are done per line: One for
the linked list item and one for the actual line string.  That imposes a
significant amount of allocation overhead.

Reduce it by doing the same as the sort subcommand of test-string-list,
namely to read the whole input file into a single buffer and then split
it in-place.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.23(0.20+0.01)     0.22(0.20+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.12(0.10+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.12(0.10+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.71(0.00+0.03)     0.54(0.00+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.42(0.00+0.04)     0.21(0.03+0.03)
0071.16: DEFINE_LIST_SORT reversed     0.42(0.06+0.01)     0.21(0.01+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.41(0.39+0.02)     0.29(0.27+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.08+0.02)     0.07(0.06+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.08+0.02)     0.07(0.04+0.03)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:05 +02:00
+										line->text = p;
 										*tail = line;
 										tail = &line->next;
 										if (!eol)
 											break;
 										*eol = '\0';
 										p = eol + 1;
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+									}
-												test-mergesort: read sort input all at once

The sort subcommand of test-mergesort is used to test the performance of
sorting linked lists.  It reads lines from stdin, sorts them and prints
the result to stdout.  Two heap allocations are done per line: One for
the linked list item and one for the actual line string.  That imposes a
significant amount of allocation overhead.

Reduce it by doing the same as the sort subcommand of test-string-list,
namely to read the whole input file into a single buffer and then split
it in-place.

Note that t/perf/run can't be used directly to compare two versions of
test-mergesort because it always runs the helpers from the checked-out
version.  So I hand-merged the results of separate runs before and with
this patch:

macOS 12.5.1 on M1:
0071.12: DEFINE_LIST_SORT unsorted     0.23(0.20+0.01)     0.22(0.20+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.12(0.10+0.01)     0.10(0.08+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.12(0.10+0.01)     0.10(0.08+0.01)

Git SDK 64-bit on Windows 11 21H2 on Ryzen 7 5800H:
0071.12: DEFINE_LIST_SORT unsorted     0.71(0.00+0.03)     0.54(0.00+0.06)
0071.14: DEFINE_LIST_SORT sorted       0.42(0.00+0.04)     0.21(0.03+0.03)
0071.16: DEFINE_LIST_SORT reversed     0.42(0.06+0.01)     0.21(0.01+0.04)

Debian bullseye on WSL2 on the same system:
0071.12: DEFINE_LIST_SORT unsorted     0.41(0.39+0.02)     0.29(0.27+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.08+0.02)     0.07(0.06+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.08+0.02)     0.07(0.04+0.03)

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-08-28 12:34:05 +02:00
+									*tail = NULL;
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
-												test-mergesort: use DEFINE_LIST_SORT

Build a typed sort function for the mergesort performance test tool
using DEFINE_LIST_SORT instead of calling llist_mergesort().  This gets
rid of the next pointer accessor functions and improves the performance
at the cost of a slightly higher object text size.

Before:
0071.12: llist_mergesort() unsorted    0.24(0.22+0.01)
0071.14: llist_mergesort() sorted      0.12(0.10+0.01)
0071.16: llist_mergesort() reversed    0.12(0.10+0.01)

__TEXT	__DATA	__OBJC	others	dec	hex
6407	276	0	24701	31384	7a98	t/helper/test-mergesort.o

With this patch:
0071.12: DEFINE_LIST_SORT unsorted     0.22(0.21+0.01)
0071.14: DEFINE_LIST_SORT sorted       0.11(0.10+0.01)
0071.16: DEFINE_LIST_SORT reversed     0.11(0.10+0.01)

__TEXT	__DATA	__OBJC	others	dec	hex
6615	276	0	25832	32723	7fd3	t/helper/test-mergesort.o

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-07-16 18:57:18 +02:00
+									sort_lines(&lines, compare_strings);
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
 									while (lines) {
-												test-mergesort: use strbuf_getline()

Strip line ending characters to make sure empty lines are sorted like
sort(1) does.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:10:09 +02:00
+										puts(lines->text);
-												add mergesort() for linked lists

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2012-04-01 00:10:11 +02:00
+										lines = lines->next;
 									}
 									return 0;
 								}
-												test-mergesort: add sort subcommand

Give the code for sorting a text file its own sub-command.  This allows
extending the helper, which we'll do in the following patches.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:11:19 +02:00
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								static void dist_sawtooth(int *arr, int n, int m)
 								{
 									int i;
 									for (i = 0; i < n; i++)
 										arr[i] = i % m;
 								}
 								static void dist_rand(int *arr, int n, int m)
 								{
 									int i;
-												test-mergesort: use repeatable random numbers

Use MINSTD to generate pseudo-random numbers consistently instead of
using rand(3), whose output can vary from system to system, and reset
its seed before filling in the test values.  This gives repeatable
results across versions and systems, which simplifies sharing and
comparing of results between developers.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-08 06:04:42 +02:00
+									uint32_t seed = 1;
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+									for (i = 0; i < n; i++)
-												test-mergesort: use repeatable random numbers

Use MINSTD to generate pseudo-random numbers consistently instead of
using rand(3), whose output can vary from system to system, and reset
its seed before filling in the test values.  This gives repeatable
results across versions and systems, which simplifies sharing and
comparing of results between developers.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-08 06:04:42 +02:00
+										arr[i] = minstd_rand(&seed) % m;
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								}
 								static void dist_stagger(int *arr, int n, int m)
 								{
 									int i;
 									for (i = 0; i < n; i++)
 										arr[i] = (i * m + i) % n;
 								}
 								static void dist_plateau(int *arr, int n, int m)
 								{
 									int i;
 									for (i = 0; i < n; i++)
 										arr[i] = (i < m) ? i : m;
 								}
 								static void dist_shuffle(int *arr, int n, int m)
 								{
 									int i, j, k;
-												test-mergesort: use repeatable random numbers

Use MINSTD to generate pseudo-random numbers consistently instead of
using rand(3), whose output can vary from system to system, and reset
its seed before filling in the test values.  This gives repeatable
results across versions and systems, which simplifies sharing and
comparing of results between developers.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-08 06:04:42 +02:00
+									uint32_t seed = 1;
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+									for (i = j = 0, k = 1; i < n; i++)
-												test-mergesort: use repeatable random numbers

Use MINSTD to generate pseudo-random numbers consistently instead of
using rand(3), whose output can vary from system to system, and reset
its seed before filling in the test values.  This gives repeatable
results across versions and systems, which simplifies sharing and
comparing of results between developers.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-08 06:04:42 +02:00
+										arr[i] = minstd_rand(&seed) % m ? (j += 2) : (k += 2);
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								}
 								#define DIST(name) { #name, dist_##name }
 								static struct dist {
 									const char *name;
 									void (*fn)(int *arr, int n, int m);
 								} dist[] = {
 									DIST(sawtooth),
 									DIST(rand),
 									DIST(stagger),
 									DIST(plateau),
 									DIST(shuffle),
 								};
-												test-mergesort: add generate subcommand

Add a subcommand for printing test data.  It can be used to generate
special test cases and feed them into the sort subcommand or sort(1) for
performance measurements.  It may also be useful to illustrate the
effect of distributions, modes and their parameters.

It generates n integers with the specified distribution and its
distribution-specific parameter m.  E.g. m is the maximum value for
the plateau distribution and the length and height of individual teeth
of the sawtooth distribution.

The generated values are printed as zero-padded eight-digit hexadecimal
numbers to make sure alphabetic and numeric order are the same.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:14:32 +02:00
+								static const struct dist *get_dist_by_name(const char *name)
 								{
 									int i;
 									for (i = 0; i < ARRAY_SIZE(dist); i++) {
 									       if (!strcmp(dist[i].name, name))
 										       return &dist[i];
 									}
 									return NULL;
 								}
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								static void mode_copy(int *arr, int n)
 								{
 									/* nothing */
 								}
 								static void mode_reverse(int *arr, int n)
 								{
 									int i, j;
 									for (i = 0, j = n - 1; i < j; i++, j--)
 										SWAP(arr[i], arr[j]);
 								}
 								static void mode_reverse_1st_half(int *arr, int n)
 								{
 									mode_reverse(arr, n / 2);
 								}
 								static void mode_reverse_2nd_half(int *arr, int n)
 								{
 									int half = n / 2;
 									mode_reverse(arr + half, n - half);
 								}
 								static int compare_ints(const void *av, const void *bv)
 								{
 									const int *ap = av, *bp = bv;
 									int a = *ap, b = *bp;
 									return (a > b) - (a < b);
 								}
 								static void mode_sort(int *arr, int n)
 								{
 									QSORT(arr, n, compare_ints);
 								}
 								static void mode_dither(int *arr, int n)
 								{
 									int i;
 									for (i = 0; i < n; i++)
 										arr[i] += i % 5;
 								}
-												test-mergesort: add unriffle mode

Add a mode that turns sorted items into adversarial input for mergesort.
Do that by running mergesort in reverse and rearranging the items in
such a way that each merge needs the maximum number of operations to
undo it.

To riffle is a card shuffling technique and involves splitting a deck
into two and then to interleave them.  A perfect riffle takes one card
from each half in turn.  That's similar to the most expensive merge,
which has to take one item from each sublist in turn, which requires the
maximum number of comparisons (n-1).

So unriffle does that in reverse, i.e. it generates the first sublist
out of the items at even indexes and the second sublist out of the items
at odd indexes, without changing their order in any other way.  Done
recursively until we reach the trivial sublist length of one, this
twists the list into an order that requires the maximum effort for
mergesort to untangle.

As a baseline, here are the rand distributions with the highest number
of comparisons from "test-tool mergesort test":

   $ t/helper/test-tool mergesort test | awk '
      NR > 1 && $1 != "rand" {next}
      $7 > max[$3] {max[$3] = $7; line[$3] = $0}
      END {for (n in line) print line[n]}
   '

distribut mode                    n        m get_next set_next  compare verdict
rand      copy                  100       32     1184      700      569 OK
rand      reverse_1st_half     1023      256    16373    10230     8976 OK
rand      reverse_1st_half     1024      512    16384    10240     8993 OK
rand      dither               1025       64    18454    11275     9970 OK

And here are the most expensive ones overall:

   $ t/helper/test-tool mergesort test | awk '
      $7 > max[$3] {max[$3] = $7; line[$3] = $0}
      END {for (n in line) print line[n]}
   '

distribut mode                    n        m get_next set_next  compare verdict
stagger   reverse               100       64     1184      700      580 OK
sawtooth  unriffle             1023     1024    16373    10230     9179 OK
sawtooth  unriffle             1024     1024    16384    10240     9217 OK
stagger   unriffle             1025     2048    18454    11275    10241 OK

The sawtooth distribution with m>=n generates a sorted list.  The
unriffle mode is designed to turn that into adversarial input for
mergesort, and that checks out for n=1023 and n=1024, where it produces
the list that requires the most comparisons.

Item counts that are not powers of two have other winners, and that's
because unriffle recursively splits lists into equal-sized halves, while
llist_mergesort() splits them into the biggest power of two smaller than
n and the rest, e.g. for n=1025 it sorts the first 1024 separately and
finally merges them to the last item.

So unriffle mode works as designed for the intended use case, but to
consistently generate adversarial input for unbalanced merges we need
something else.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:16:49 +02:00
+								static void unriffle(int *arr, int n, int *tmp)
 								{
 									int i, j;
 									COPY_ARRAY(tmp, arr, n);
 									for (i = j = 0; i < n; i += 2)
 										arr[j++] = tmp[i];
 									for (i = 1; i < n; i += 2)
 										arr[j++] = tmp[i];
 								}
 								static void unriffle_recursively(int *arr, int n, int *tmp)
 								{
 									if (n > 1) {
 										int half = n / 2;
 										unriffle(arr, n, tmp);
 										unriffle_recursively(arr, half, tmp);
 										unriffle_recursively(arr + half, n - half, tmp);
 									}
 								}
 								static void mode_unriffle(int *arr, int n)
 								{
 									int *tmp;
 									ALLOC_ARRAY(tmp, n);
 									unriffle_recursively(arr, n, tmp);
 									free(tmp);
 								}
-												test-mergesort: add unriffle_skewed mode

Add a mode that turns a sorted list into adversarial input for a
bottom-up mergesort implementation that doubles the length of sorted
sublists at each level -- like our llist_mergesort().

While unriffle mode splits the list in half at each recursion step,
unriffle_skewed splits it into 2^l items and the rest, with 2^l being
the highest power of two smaller than the number of items and thus
2^l >= rest.  The rest is unriffled with the tail of the first half to
require a merge to compare the maximum number of elements.

It complements the unriffle mode, which targets balanced merges.  If
the number of elements is a power of two then both actually produce the
same result, as 2^l == rest == n/2 at each recursion step in that case.

Here are the results:

   $ t/helper/test-tool mergesort test | awk '
      $7 > max[$3] {max[$3] = $7; line[$3] = $0}
      END {for (n in line) print line[n]}
   '

distribut mode                    n        m get_next set_next  compare verdict
sawtooth  unriffle_skewed       100      128     1184      700      589 OK
sawtooth  unriffle_skewed      1023     1024    16373    10230     9207 OK
sawtooth  unriffle             1024     1024    16384    10240     9217 OK
sawtooth  unriffle_skewed      1025     2048    18454    11275    10241 OK

The sawtooth distribution with m>=n produces a sorted list and
unriffle_skewed mode turns it into adversarial input for unbalanced
merges, which it wins in all cases except for n=1024 -- the resulting
list is the same, but unriffle is tested before unriffle_skewed, so its
result is selected by the AWK script.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:17:57 +02:00
+								static unsigned int prev_pow2(unsigned int n)
 								{
 									unsigned int pow2 = 1;
 									while (pow2 * 2 < n)
 										pow2 *= 2;
 									return pow2;
 								}
 								static void unriffle_recursively_skewed(int *arr, int n, int *tmp)
 								{
 									if (n > 1) {
 										int pow2 = prev_pow2(n);
 										int rest = n - pow2;
 										unriffle(arr + pow2 - rest, rest * 2, tmp);
 										unriffle_recursively_skewed(arr, pow2, tmp);
 										unriffle_recursively_skewed(arr + pow2, rest, tmp);
 									}
 								}
 								static void mode_unriffle_skewed(int *arr, int n)
 								{
 									int *tmp;
 									ALLOC_ARRAY(tmp, n);
 									unriffle_recursively_skewed(arr, n, tmp);
 									free(tmp);
 								}
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								#define MODE(name) { #name, mode_##name }
 								static struct mode {
 									const char *name;
 									void (*fn)(int *arr, int n);
 								} mode[] = {
 									MODE(copy),
 									MODE(reverse),
 									MODE(reverse_1st_half),
 									MODE(reverse_2nd_half),
 									MODE(sort),
 									MODE(dither),
-												test-mergesort: add unriffle mode

Add a mode that turns sorted items into adversarial input for mergesort.
Do that by running mergesort in reverse and rearranging the items in
such a way that each merge needs the maximum number of operations to
undo it.

To riffle is a card shuffling technique and involves splitting a deck
into two and then to interleave them.  A perfect riffle takes one card
from each half in turn.  That's similar to the most expensive merge,
which has to take one item from each sublist in turn, which requires the
maximum number of comparisons (n-1).

So unriffle does that in reverse, i.e. it generates the first sublist
out of the items at even indexes and the second sublist out of the items
at odd indexes, without changing their order in any other way.  Done
recursively until we reach the trivial sublist length of one, this
twists the list into an order that requires the maximum effort for
mergesort to untangle.

As a baseline, here are the rand distributions with the highest number
of comparisons from "test-tool mergesort test":

   $ t/helper/test-tool mergesort test | awk '
      NR > 1 && $1 != "rand" {next}
      $7 > max[$3] {max[$3] = $7; line[$3] = $0}
      END {for (n in line) print line[n]}
   '

distribut mode                    n        m get_next set_next  compare verdict
rand      copy                  100       32     1184      700      569 OK
rand      reverse_1st_half     1023      256    16373    10230     8976 OK
rand      reverse_1st_half     1024      512    16384    10240     8993 OK
rand      dither               1025       64    18454    11275     9970 OK

And here are the most expensive ones overall:

   $ t/helper/test-tool mergesort test | awk '
      $7 > max[$3] {max[$3] = $7; line[$3] = $0}
      END {for (n in line) print line[n]}
   '

distribut mode                    n        m get_next set_next  compare verdict
stagger   reverse               100       64     1184      700      580 OK
sawtooth  unriffle             1023     1024    16373    10230     9179 OK
sawtooth  unriffle             1024     1024    16384    10240     9217 OK
stagger   unriffle             1025     2048    18454    11275    10241 OK

The sawtooth distribution with m>=n generates a sorted list.  The
unriffle mode is designed to turn that into adversarial input for
mergesort, and that checks out for n=1023 and n=1024, where it produces
the list that requires the most comparisons.

Item counts that are not powers of two have other winners, and that's
because unriffle recursively splits lists into equal-sized halves, while
llist_mergesort() splits them into the biggest power of two smaller than
n and the rest, e.g. for n=1025 it sorts the first 1024 separately and
finally merges them to the last item.

So unriffle mode works as designed for the intended use case, but to
consistently generate adversarial input for unbalanced merges we need
something else.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:16:49 +02:00
+									MODE(unriffle),
-												test-mergesort: add unriffle_skewed mode

Add a mode that turns a sorted list into adversarial input for a
bottom-up mergesort implementation that doubles the length of sorted
sublists at each level -- like our llist_mergesort().

While unriffle mode splits the list in half at each recursion step,
unriffle_skewed splits it into 2^l items and the rest, with 2^l being
the highest power of two smaller than the number of items and thus
2^l >= rest.  The rest is unriffled with the tail of the first half to
require a merge to compare the maximum number of elements.

It complements the unriffle mode, which targets balanced merges.  If
the number of elements is a power of two then both actually produce the
same result, as 2^l == rest == n/2 at each recursion step in that case.

Here are the results:

   $ t/helper/test-tool mergesort test | awk '
      $7 > max[$3] {max[$3] = $7; line[$3] = $0}
      END {for (n in line) print line[n]}
   '

distribut mode                    n        m get_next set_next  compare verdict
sawtooth  unriffle_skewed       100      128     1184      700      589 OK
sawtooth  unriffle_skewed      1023     1024    16373    10230     9207 OK
sawtooth  unriffle             1024     1024    16384    10240     9217 OK
sawtooth  unriffle_skewed      1025     2048    18454    11275    10241 OK

The sawtooth distribution with m>=n produces a sorted list and
unriffle_skewed mode turns it into adversarial input for unbalanced
merges, which it wins in all cases except for n=1024 -- the resulting
list is the same, but unriffle is tested before unriffle_skewed, so its
result is selected by the AWK script.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:17:57 +02:00
+									MODE(unriffle_skewed),
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								};
-												test-mergesort: add generate subcommand

Add a subcommand for printing test data.  It can be used to generate
special test cases and feed them into the sort subcommand or sort(1) for
performance measurements.  It may also be useful to illustrate the
effect of distributions, modes and their parameters.

It generates n integers with the specified distribution and its
distribution-specific parameter m.  E.g. m is the maximum value for
the plateau distribution and the length and height of individual teeth
of the sawtooth distribution.

The generated values are printed as zero-padded eight-digit hexadecimal
numbers to make sure alphabetic and numeric order are the same.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:14:32 +02:00
+								static const struct mode *get_mode_by_name(const char *name)
 								{
 									int i;
 									for (i = 0; i < ARRAY_SIZE(mode); i++) {
 									       if (!strcmp(mode[i].name, name))
 										       return &mode[i];
 									}
 									return NULL;
 								}
 								static int generate(int argc, const char **argv)
 								{
 									const struct dist *dist = NULL;
 									const struct mode *mode = NULL;
 									int i, n, m, *arr;
 									if (argc != 4)
 										return 1;
 									dist = get_dist_by_name(argv[0]);
 									mode = get_mode_by_name(argv[1]);
 									n = strtol(argv[2], NULL, 10);
 									m = strtol(argv[3], NULL, 10);
 									if (!dist || !mode)
 										return 1;
 									ALLOC_ARRAY(arr, n);
 									dist->fn(arr, n, m);
 									mode->fn(arr, n);
 									for (i = 0; i < n; i++)
 										printf("%08x\n", arr[i]);
 									free(arr);
 									return 0;
 								}
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								static struct stats {
 									int get_next, set_next, compare;
 								} stats;
 								struct number {
 									int value, rank;
 									struct number *next;
 								};
-												test-mergesort: use DEFINE_LIST_SORT_DEBUG

Define a typed sort function using DEFINE_LIST_SORT_DEBUG for the
mergesort sanity check instead of using llist_mergesort().  This gets
rid of the next pointer accessor functions and improves the performance
at the cost of slightly bigger object text.

Before:
Benchmark 1: t/helper/test-tool mergesort test
  Time (mean ± σ):     108.4 ms ±   0.2 ms    [User: 106.7 ms, System: 1.2 ms]
  Range (min … max):   108.0 ms … 108.8 ms    27 runs

__TEXT	__DATA	__OBJC	others	dec	hex
6251	276	0	23172	29699	7403	t/helper/test-mergesort.o

With this patch:
Benchmark 1: t/helper/test-tool mergesort test
  Time (mean ± σ):      94.0 ms ±   0.2 ms    [User: 92.4 ms, System: 1.1 ms]
  Range (min … max):    93.7 ms …  94.5 ms    31 runs

__TEXT	__DATA	__OBJC	others	dec	hex
6407	276	0	24701	31384	7a98	t/helper/test-mergesort.o

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-07-16 18:56:32 +02:00
+								DEFINE_LIST_SORT_DEBUG(static, sort_numbers, struct number, next,
 										       stats.get_next++, stats.set_next++);
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
-												test-mergesort: use DEFINE_LIST_SORT_DEBUG

Define a typed sort function using DEFINE_LIST_SORT_DEBUG for the
mergesort sanity check instead of using llist_mergesort().  This gets
rid of the next pointer accessor functions and improves the performance
at the cost of slightly bigger object text.

Before:
Benchmark 1: t/helper/test-tool mergesort test
  Time (mean ± σ):     108.4 ms ±   0.2 ms    [User: 106.7 ms, System: 1.2 ms]
  Range (min … max):   108.0 ms … 108.8 ms    27 runs

__TEXT	__DATA	__OBJC	others	dec	hex
6251	276	0	23172	29699	7403	t/helper/test-mergesort.o

With this patch:
Benchmark 1: t/helper/test-tool mergesort test
  Time (mean ± σ):      94.0 ms ±   0.2 ms    [User: 92.4 ms, System: 1.1 ms]
  Range (min … max):    93.7 ms …  94.5 ms    31 runs

__TEXT	__DATA	__OBJC	others	dec	hex
6407	276	0	24701	31384	7a98	t/helper/test-mergesort.o

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-07-16 18:56:32 +02:00
+								static int compare_numbers(const struct number *an, const struct number *bn)
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+								{
 									int a = an->value, b = bn->value;
 									stats.compare++;
 									return (a > b) - (a < b);
 								}
 								static void clear_numbers(struct number *list)
 								{
 									while (list) {
 										struct number *next = list->next;
 										free(list);
 										list = next;
 									}
 								}
 								static int test(const struct dist *dist, const struct mode *mode, int n, int m)
 								{
 									int *arr;
 									size_t i;
 									struct number *curr, *list, **tail;
 									int is_sorted = 1;
 									int is_stable = 1;
 									const char *verdict;
 									int result = -1;
 									ALLOC_ARRAY(arr, n);
 									dist->fn(arr, n, m);
 									mode->fn(arr, n);
 									for (i = 0, tail = &list; i < n; i++) {
 										curr = xmalloc(sizeof(*curr));
 										curr->value = arr[i];
 										curr->rank = i;
 										*tail = curr;
 										tail = &curr->next;
 									}
 									*tail = NULL;
 									stats.get_next = stats.set_next = stats.compare = 0;
-												test-mergesort: use DEFINE_LIST_SORT_DEBUG

Define a typed sort function using DEFINE_LIST_SORT_DEBUG for the
mergesort sanity check instead of using llist_mergesort().  This gets
rid of the next pointer accessor functions and improves the performance
at the cost of slightly bigger object text.

Before:
Benchmark 1: t/helper/test-tool mergesort test
  Time (mean ± σ):     108.4 ms ±   0.2 ms    [User: 106.7 ms, System: 1.2 ms]
  Range (min … max):   108.0 ms … 108.8 ms    27 runs

__TEXT	__DATA	__OBJC	others	dec	hex
6251	276	0	23172	29699	7403	t/helper/test-mergesort.o

With this patch:
Benchmark 1: t/helper/test-tool mergesort test
  Time (mean ± σ):      94.0 ms ±   0.2 ms    [User: 92.4 ms, System: 1.1 ms]
  Range (min … max):    93.7 ms …  94.5 ms    31 runs

__TEXT	__DATA	__OBJC	others	dec	hex
6407	276	0	24701	31384	7a98	t/helper/test-mergesort.o

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2022-07-16 18:56:32 +02:00
+									sort_numbers(&list, compare_numbers);
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
 									QSORT(arr, n, compare_ints);
 									for (i = 0, curr = list; i < n && curr; i++, curr = curr->next) {
 										if (arr[i] != curr->value)
 											is_sorted = 0;
 										if (curr->next && curr->value == curr->next->value &&
 										    curr->rank >= curr->next->rank)
 											is_stable = 0;
 									}
 									if (i < n) {
 										verdict = "too short";
 									} else if (curr) {
 										verdict = "too long";
 									} else if (!is_sorted) {
 										verdict = "not sorted";
 									} else if (!is_stable) {
 										verdict = "unstable";
 									} else {
 										verdict = "OK";
 										result = 0;
 									}
 									printf("%-9s %-16s %8d %8d %8d %8d %8d %s\n",
 									       dist->name, mode->name, n, m, stats.get_next, stats.set_next,
 									       stats.compare, verdict);
 									clear_numbers(list);
 									free(arr);
 									return result;
 								}
 								/*
 								 * A version of the qsort certification program from "Engineering a Sort
 								 * Function" by Bentley and McIlroy, Software—Practice and Experience,
 								 * Volume 23, Issue 11, 1249–1265 (November 1993).
 								 */
 								static int run_tests(int argc, const char **argv)
 								{
 									const char *argv_default[] = { "100", "1023", "1024", "1025" };
 									if (!argc)
 										return run_tests(ARRAY_SIZE(argv_default), argv_default);
 									printf("%-9s %-16s %8s %8s %8s %8s %8s %s\n",
 									       "distribut", "mode", "n", "m", "get_next", "set_next",
 									       "compare", "verdict");
 									while (argc--) {
 										int i, j, m, n = strtol(*argv++, NULL, 10);
 										for (i = 0; i < ARRAY_SIZE(dist); i++) {
 											for (j = 0; j < ARRAY_SIZE(mode); j++) {
 												for (m = 1; m < 2 * n; m *= 2) {
 													if (test(&dist[i], &mode[j], n, m))
 														return 1;
 												}
 											}
 										}
 									}
 									return 0;
 								}
-												test-mergesort: add sort subcommand

Give the code for sorting a text file its own sub-command.  This allows
extending the helper, which we'll do in the following patches.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:11:19 +02:00
+								int cmd__mergesort(int argc, const char **argv)
 								{
-												test-mergesort: add generate subcommand

Add a subcommand for printing test data.  It can be used to generate
special test cases and feed them into the sort subcommand or sort(1) for
performance measurements.  It may also be useful to illustrate the
effect of distributions, modes and their parameters.

It generates n integers with the specified distribution and its
distribution-specific parameter m.  E.g. m is the maximum value for
the plateau distribution and the length and height of individual teeth
of the sawtooth distribution.

The generated values are printed as zero-padded eight-digit hexadecimal
numbers to make sure alphabetic and numeric order are the same.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:14:32 +02:00
+									int i;
 									const char *sep;
 									if (argc == 6 && !strcmp(argv[1], "generate"))
 										return generate(argc - 2, argv + 2);
-												test-mergesort: add sort subcommand

Give the code for sorting a text file its own sub-command.  This allows
extending the helper, which we'll do in the following patches.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:11:19 +02:00
+									if (argc == 2 && !strcmp(argv[1], "sort"))
 										return sort_stdin();
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+									if (argc > 1 && !strcmp(argv[1], "test"))
 										return run_tests(argc - 2, argv + 2);
-												test-mergesort: add generate subcommand

Add a subcommand for printing test data.  It can be used to generate
special test cases and feed them into the sort subcommand or sort(1) for
performance measurements.  It may also be useful to illustrate the
effect of distributions, modes and their parameters.

It generates n integers with the specified distribution and its
distribution-specific parameter m.  E.g. m is the maximum value for
the plateau distribution and the length and height of individual teeth
of the sawtooth distribution.

The generated values are printed as zero-padded eight-digit hexadecimal
numbers to make sure alphabetic and numeric order are the same.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:14:32 +02:00
+									fprintf(stderr, "usage: test-tool mergesort generate <distribution> <mode> <n> <m>\n");
 									fprintf(stderr, "   or: test-tool mergesort sort\n");
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+									fprintf(stderr, "   or: test-tool mergesort test [<n>...]\n");
-												test-mergesort: add generate subcommand

Add a subcommand for printing test data.  It can be used to generate
special test cases and feed them into the sort subcommand or sort(1) for
performance measurements.  It may also be useful to illustrate the
effect of distributions, modes and their parameters.

It generates n integers with the specified distribution and its
distribution-specific parameter m.  E.g. m is the maximum value for
the plateau distribution and the length and height of individual teeth
of the sawtooth distribution.

The generated values are printed as zero-padded eight-digit hexadecimal
numbers to make sure alphabetic and numeric order are the same.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:14:32 +02:00
+									fprintf(stderr, "\n");
 									for (i = 0, sep = "distributions: "; i < ARRAY_SIZE(dist); i++, sep = ", ")
 										fprintf(stderr, "%s%s", sep, dist[i].name);
 									fprintf(stderr, "\n");
 									for (i = 0, sep = "modes: "; i < ARRAY_SIZE(mode); i++, sep = ", ")
 										fprintf(stderr, "%s%s", sep, mode[i].name);
 									fprintf(stderr, "\n");
-												test-mergesort: add test subcommand

Adapt the qsort certification program from "Engineering a Sort Function"
by Bentley and McIlroy for testing our linked list sort function.  It
generates several lists with various distribution patterns and counts
the number of operations llist_mergesort() needs to order them.  It
compares the result to the output of a trusted sort function (qsort(1))
and also checks if the sort is stable.

Also add a test script that makes use of the new subcommand.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:12:27 +02:00
+									return 129;
-												test-mergesort: add sort subcommand

Give the code for sorting a text file its own sub-command.  This allows
extending the helper, which we'll do in the following patches.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

											
										
										
											2021-10-01 11:11:19 +02:00
+								}