diff --git a/.gitignore b/.gitignore index bb5c91e712..0ca7df8e02 100644 --- a/.gitignore +++ b/.gitignore @@ -193,6 +193,7 @@ /test-run-command /test-sha1 /test-sigchain +/test-string-list /test-subprocess /test-svn-fe /common-cmds.h diff --git a/Documentation/technical/api-string-list.txt b/Documentation/technical/api-string-list.txt index 113f8410ea..1dcad47f7c 100644 --- a/Documentation/technical/api-string-list.txt +++ b/Documentation/technical/api-string-list.txt @@ -21,7 +21,8 @@ member (you need this if you add things later) and you should set the `nr` and `alloc` members in that case, too. . Adds new items to the list, using `string_list_append`, - `string_list_append_nodup`, or `string_list_insert`. + `string_list_append_nodup`, `string_list_insert`, + `string_list_split`, and/or `string_list_split_in_place`. . Can check if a string is in the list using `string_list_has_string` or `unsorted_string_list_has_string` and get it from the list using @@ -135,6 +136,25 @@ counterpart for sorted lists, which performs a binary search. is set. The third parameter controls if the `util` pointer of the items should be freed or not. +`string_list_split`:: +`string_list_split_in_place`:: + + Split a string into substrings on a delimiter character and + append the substrings to a `string_list`. If `maxsplit` is + non-negative, then split at most `maxsplit` times. Return the + number of substrings appended to the list. ++ +`string_list_split` requires a `string_list` that has `strdup_strings` +set to true; it leaves the input string untouched and makes copies of +the substrings in newly-allocated memory. +`string_list_split_in_place` requires a `string_list` that has +`strdup_strings` set to false; it splits the input string in place, +overwriting the delimiter characters with NULs and creating new +string_list_items that point into the original string (the original +string must therefore not be modified or freed while the `string_list` +is in use). + + Data structures --------------- diff --git a/Makefile b/Makefile index 66e82167eb..ebbb381c19 100644 --- a/Makefile +++ b/Makefile @@ -501,6 +501,7 @@ TEST_PROGRAMS_NEED_X += test-run-command TEST_PROGRAMS_NEED_X += test-scrap-cache-tree TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-string-list TEST_PROGRAMS_NEED_X += test-subprocess TEST_PROGRAMS_NEED_X += test-svn-fe diff --git a/string-list.c b/string-list.c index ad2aa5af91..acb1f5b6dc 100644 --- a/string-list.c +++ b/string-list.c @@ -204,3 +204,56 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_ list->items[i] = list->items[list->nr-1]; list->nr--; } + +int string_list_split(struct string_list *list, const char *string, + int delim, int maxsplit) +{ + int count = 0; + const char *p = string, *end; + + if (!list->strdup_strings) + die("internal error in string_list_split(): " + "list->strdup_strings must be set"); + for (;;) { + count++; + if (maxsplit >= 0 && count > maxsplit) { + string_list_append(list, p); + return count; + } + end = strchr(p, delim); + if (end) { + string_list_append_nodup(list, xmemdupz(p, end - p)); + p = end + 1; + } else { + string_list_append(list, p); + return count; + } + } +} + +int string_list_split_in_place(struct string_list *list, char *string, + int delim, int maxsplit) +{ + int count = 0; + char *p = string, *end; + + if (list->strdup_strings) + die("internal error in string_list_split_in_place(): " + "list->strdup_strings must not be set"); + for (;;) { + count++; + if (maxsplit >= 0 && count > maxsplit) { + string_list_append(list, p); + return count; + } + end = strchr(p, delim); + if (end) { + *end = '\0'; + string_list_append(list, p); + p = end + 1; + } else { + string_list_append(list, p); + return count; + } + } +} diff --git a/string-list.h b/string-list.h index 1b3915b741..dc5fbc80ac 100644 --- a/string-list.h +++ b/string-list.h @@ -63,4 +63,33 @@ struct string_list_item *unsorted_string_list_lookup(struct string_list *list, const char *string); void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util); + +/* + * Split string into substrings on character delim and append the + * substrings to list. The input string is not modified. + * list->strdup_strings must be set, as new memory needs to be + * allocated to hold the substrings. If maxsplit is non-negative, + * then split at most maxsplit times. Return the number of substrings + * appended to list. + * + * Examples: + * string_list_split(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"] + * string_list_split(l, "foo:bar:baz", ':', 0) -> ["foo:bar:baz"] + * string_list_split(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"] + * string_list_split(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""] + * string_list_split(l, "", ':', -1) -> [""] + * string_list_split(l, ":", ':', -1) -> ["", ""] + */ +int string_list_split(struct string_list *list, const char *string, + int delim, int maxsplit); + +/* + * Like string_list_split(), except that string is split in-place: the + * delimiter characters in string are overwritten with NULs, and the + * new string_list_items point into string (which therefore must not + * be modified or freed while the string_list is in use). + * list->strdup_strings must *not* be set. + */ +int string_list_split_in_place(struct string_list *list, char *string, + int delim, int maxsplit); #endif /* STRING_LIST_H */ diff --git a/t/t0063-string-list.sh b/t/t0063-string-list.sh new file mode 100755 index 0000000000..fb85430751 --- /dev/null +++ b/t/t0063-string-list.sh @@ -0,0 +1,63 @@ +#!/bin/sh +# +# Copyright (c) 2012 Michael Haggerty +# + +test_description='Test string list functionality' + +. ./test-lib.sh + +test_split () { + cat >expected && + test_expect_success "split $1 at $2, max $3" " + test-string-list split '$1' '$2' '$3' >actual && + test_cmp expected actual && + test-string-list split_in_place '$1' '$2' '$3' >actual && + test_cmp expected actual + " +} + +test_split "foo:bar:baz" ":" "-1" <nr; i++) + printf("[%d]: \"%s\"\n", i, list->items[i].string); +} + +int main(int argc, char **argv) +{ + if (argc == 5 && !strcmp(argv[1], "split")) { + struct string_list list = STRING_LIST_INIT_DUP; + int i; + const char *s = argv[2]; + int delim = *argv[3]; + int maxsplit = atoi(argv[4]); + + i = string_list_split(&list, s, delim, maxsplit); + printf("%d\n", i); + write_list(&list); + string_list_clear(&list, 0); + return 0; + } + + if (argc == 5 && !strcmp(argv[1], "split_in_place")) { + struct string_list list = STRING_LIST_INIT_NODUP; + int i; + char *s = xstrdup(argv[2]); + int delim = *argv[3]; + int maxsplit = atoi(argv[4]); + + i = string_list_split_in_place(&list, s, delim, maxsplit); + printf("%d\n", i); + write_list(&list); + string_list_clear(&list, 0); + free(s); + return 0; + } + + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], + argv[1] ? argv[1] : "(there was none)"); + return 1; +}