-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSubstring with Concatenation of All Words
125 lines (118 loc) · 4.18 KB
/
Substring with Concatenation of All Words
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// for sort the |words|
int strcmp1(const void* p1, const void* p2) {
return strcmp(*(char**)p1, *(char**)p2);
}
// to handle duplicate words in |words|
struct word_t {
char* w;
int count;
int* pos;
int cur;
};
int word_t_cmp(const void* p1, const void* p2) {
return strcmp(((const struct word_t*)p1)->w, ((const struct word_t*)p2)->w);
}
/**
* Return an array of size *returnSize.
* Note: The returned array must be malloced, assume caller calls free().
*
* It is a variant of the longest-substring-without-repetition problem
*/
int* findSubstring(char* s, char** words, int wordsSize, int* returnSize) {
if (!s || wordsSize == 0 || strlen(s) < wordsSize*strlen(words[0])) {
*returnSize = 0;
return NULL;
}
if (strlen(s) == 0 && strlen(words[0]) == 0) {
*returnSize = 1;
int* ret = (int*) malloc(sizeof(int));
ret[0] = 0;
return ret;
}
int n = strlen(s);
int k = strlen(words[0]);
// sort |words| first, prepare for binary search
qsort(words, wordsSize, sizeof(char*), strcmp1);
// construct array of word_t
// @note please note that after construction, |wts| is already sorted
struct word_t* wts = (struct word_t*) malloc(wordsSize * sizeof(struct word_t));
int wtsSize = 0;
for (int i = 0; i < wordsSize;) {
char* w = words[i];
int count = 1;
while (++i < wordsSize && !strcmp(w, words[i]))
count++;
struct word_t* wt_ptr = wts + wtsSize++;
wt_ptr->w = w;
wt_ptr->count = count;
wt_ptr->pos = (int*) malloc(count * sizeof(int));
wt_ptr->cur = 0;
}
// store one word
struct word_t wt;
wt.w = (char*) malloc((k+1) * sizeof(char));
// return value
int cap = 10;
int* ret = (int*) malloc(cap * sizeof(int));
int size = 0;
for (int offset = 0; offset < k; offset++) {
// init word_t array
for (int i = 0; i < wtsSize; i++) {
struct word_t* wt_ptr = wts + i;
for (int j = 0; j < wt_ptr->count; j++)
wt_ptr->pos[j] = -1;
wt_ptr->cur = 0;
}
int start = offset; // start pos of current substring
int len = 0; // number of words encountered
for (int i = offset; i <= n - k; i += k) {
strncpy(wt.w, s+i, k);
wt.w[k] = '\0';
struct word_t* p = (struct word_t*) bsearch(&wt, wts, wtsSize, sizeof(struct word_t), word_t_cmp);
if (!p) {
start = i + k;
len = 0;
continue;
}
// @note the following five lines covers extra occurrence of
// (possible duplicate) word, you can draw some special case
// on a paper if it's hard to understand why it could cover
// all boundary conditions
// |p->cur| and |p->pos| implement a simple rounded queue,
// and |p->cur| always point to the smallest index position
int pos = p->pos[p->cur];
p->pos[p->cur++] = i;
if (p->cur >= p->count)
p->cur -= p->count;
if (pos < start) { // valid occurrence of this word in current substring started at |start|
len++;
if (len == wordsSize) { // all words encounterd, add to result set
if (size == cap) { // extend the array
cap = 2*cap;
int* tmp = (int*) malloc(cap * sizeof(int));
memcpy(tmp, ret, size*sizeof(int));
free(ret);
ret = tmp;
}
ret[size++] = start;
// move substring forward by one word length
start += k;
len--;
}
} else { // extra occurence of (possible duplicat) word encountered, update |start| and |len|
start = pos + k;
len = (i - start)/k + 1;
}
}
}
// cleanup
for (int i = 0; i < wtsSize; i++)
free(wts[i].pos);
free(wts);
*returnSize = size;
if (size == 0) {
free(ret);
ret = NULL;
}
return ret;
}