From 0acabf6fd515f6d84abcb287bc702c0b3dacd5fa Mon Sep 17 00:00:00 2001 From: Maxim Date: Sun, 12 Apr 2020 17:13:21 +0100 Subject: [PATCH] str_uniq() function added. --- README.md | 5 +++++ str.c | 30 ++++++++++++++++++++++++++++++ str.h | 3 +++ str_test.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/README.md b/README.md index 17e2ffd..5d84d84 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,11 @@ provided (see `str.h` file). Binary search for the given key. The input array must be sorted using `str_order_asc`. Returns a pointer to the string matching the key, or NULL. +`size_t str_uniq(str* const array, const size_t count)`
+Retain only the unique strings in the given array. Returns the number of strings. +After the call, the strings in the array are sorted, so the array is suitable for +binary search using `str_search()` function. + #### Memory allocation By default the library uses `malloc(3)` for memory allocations, and calls `abort(3)` if the allocation fails. This behaviour can be changed by hash-defining `STR_EXT_ALLOC` diff --git a/str.c b/str.c index 7904eb8..05954e8 100644 --- a/str.c +++ b/str.c @@ -328,6 +328,36 @@ void str_sort(const str_cmp_func cmp, str* const array, const size_t count) qsort(array, count, sizeof(array[0]), cmp); } +// retain unique strings only +size_t str_uniq(str* const array, const size_t count) +{ + if(!array || count == 0) + return 0; + + if(count == 1) + return 1; + + // O(n * log n), but without memory allocation + str_sort(str_order_asc, array, count); + + str* p = array; + const str* const end = array + count; + + for(str* s = array + 1; ; ++s) + { + while(s < end && str_eq(*p, *s)) + str_clear(s++); + + if(s == end) + break; + + if(++p < s) + *p = str_move(s); + } + + return p + 1 - array; +} + // searching const str* str_search(const str key, const str* const array, const size_t count) { diff --git a/str.h b/str.h index bbff328..8fd8c81 100644 --- a/str.h +++ b/str.h @@ -179,6 +179,9 @@ int str_order_desc_ci(const void* const s1, const void* const s2); // sort array of strings void str_sort(const str_cmp_func cmp, str* const array, const size_t count); +// retain unique strings only +size_t str_uniq(str* const array, const size_t count); + // searching const str* str_search(const str key, const str* const array, const size_t count); diff --git a/str_test.c b/str_test.c index 31371ac..5a96d06 100644 --- a/str_test.c +++ b/str_test.c @@ -283,6 +283,8 @@ void test_sort(void) assert(str_eq(src[1], str_lit("z"))); assert(str_eq(src[2], str_lit("bbb"))); assert(str_eq(src[3], str_lit("aaa"))); + + passed; } static @@ -303,6 +305,8 @@ void test_sort_ci(void) assert(str_eq_ci(src[1], str_lit("zzz"))); assert(str_eq_ci(src[2], str_lit("aaa"))); assert(str_eq_ci(src[3], str_lit("aaa"))); + + passed; } static @@ -318,6 +322,40 @@ void test_search(void) assert(str_search(src[2], src, count) == &src[2]); assert(str_search(src[3], src, count) == &src[3]); assert(str_search(str_lit("xxx"), src, count) == NULL); + + passed; +} + +static +void test_uniq(void) +{ + str src[10] = { 0 }; + + str* p = src; + + str_dup(p++, str_lit("zzz")); + str_dup(p++, str_lit("zzz")); + str_dup(p++, str_lit("aaa")); + str_dup(p++, str_lit("bbb")); + str_dup(p++, str_lit("aaa")); + str_dup(p++, str_lit("bbb")); + str_dup(p++, str_lit("bbb")); + str_dup(p++, str_lit("bbb")); + str_dup(p++, str_lit("aaa")); + str_dup(p++, str_lit("zzz")); + + const size_t count = str_uniq(src, p - src); + + assert(count == 3); + assert(str_eq(src[0], str_lit("aaa"))); + assert(str_eq(src[1], str_lit("bbb"))); + assert(str_eq(src[2], str_lit("zzz"))); + + // clean-up + for(size_t i = 0; i < count; ++i) + str_free(src[i]); + + passed; } int main(void) @@ -338,6 +376,7 @@ int main(void) test_sort(); test_sort_ci(); test_search(); + test_uniq(); return puts("OK.") < 0; }