-
Notifications
You must be signed in to change notification settings - Fork 0
/
PYTHON_STUDY BFS+SUFFIXARRAY+TWOPOINTERS
306 lines (285 loc) · 11.8 KB
/
PYTHON_STUDY BFS+SUFFIXARRAY+TWOPOINTERS
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
1
suffix array:
the prefix of suffix array is the substring of the whole string
"banana" suffix array is ["banana","anana","nana","ana","na","a"]
2
rabin karp algo for substring match:
len_source, len_target = len(source), len(target)
modulo = random.randint(1000000, 2000000)
hash_source, hash_target, base = 0, 0, 26
for idx in range(len_target):
hash_target = ((ord(target[idx])-ord('a')) + base * hash_target) % modulo
if hash_target < 0:
hash_target += modulo
max_base = 1
for idx in range(len_target-1):
max_base = (max_base * base) % modulo
for idx in range(len_source):
if idx >= len_target:
hash_source = (hash_source - (ord(source[idx-len_target])-ord('a')) * max_base) % modulo
hash_source = (ord(source[idx])-ord('a') + hash_source * base) % modulo
if hash_source < 0:
hash_source += modulo
if idx >= len_target-1 and hash_source == hash_target:
return idx-len_target+1
3 DFS for subsets II to deal with the duplicated cases:
def dfs_search(self, nums, st_idx, subsets, results):
results.append(subsets[:])
for idx in range(st_idx, len(nums)):
if idx-1 >= 0 and nums[idx] == nums[idx-1] and idx > st_idx:#think about the situation [1,2',2'']
#when we have [2'] and [2''], how to avoid this ?
continue
self.dfs_search(nums, idx + 1, subsets+[nums[idx]], results) #make sure subsets+[nums[idx]] can 回溯 !
return
4 rotated sorted array minimum sample code(consider duplicated case):
while(st < end) {
mid = st + (end - st) / 2;
if(nums[mid] < nums[end])
end = mid;
else if(nums[mid] > nums[end])
st = mid + 1;
else {
if(nums[st] != nums[end])
end = mid;
else {
int result = nums[mid], left = mid - 1, right = mid + 1;
while(left >= st || right <= end) {
if(left >= st) {
result = min(result, nums[left]);
--left;
}
if(right <= end) {
result = min(result, nums[right]);
++right;
}
}
return result;
}
}
}
search target in rotated sorted array (consider duplicated case):
int st = 0, end = A.size() - 1, mid;
while(st < end) {
mid = st + (end - st) / 2;
if(A[st] == target || A[mid] == target || A[end] == target) return true;
if(A[mid] < A[end]) {
if(target > A[end]) end = mid;
else if(target > A[mid]) st = mid + 1;
else end = mid;
}
else if(A[mid] > A[end]) {
if(target > A[mid]) st = mid + 1;
else if(target > A[end]) end = mid;
else st = mid + 1;
}
else {
if(A[mid] > A[st]) { //middle > st is equivalent to middle > end
if(target > A[mid]) st = mid + 1;
else if(target > A[st]) end = mid;
else st = mid + 1;
}
else if(A[mid] < A[st]) { //middle < st is equivalent to middle < end
if(target > A[st]) end = mid;
else if(target > A[mid]) st = mid + 1;
else end = mid;
}
else {
int left = mid - 1, right = mid + 1;
while(left > st) {
if(A[left] == target) return true;
--left;
}
while(right < end) {
if(A[right] == target) return true;
++right;
}
return false;
}
}
}
return false;
5 Two pointers algorithm:
1) how to remove duplicated elements in the solution set ?
ex: our result is a list of list and do not allow duplicated list element exists in our solution nested list
we should remove duplicated list as we progress the program instead of using set or check duplicated once we finished !!
problem 4sum:
numbers.sort()
result = []
for fir in range(len(numbers)-3):
if fir != 0 and numbers[fir] == numbers[fir-1]: #skip duplicated
continue
for sec in range(fir+1, len(numbers)-2):
if sec != fir+1 and numbers[sec] == numbers[sec-1]: #skip duplicated
continue
third, fourth = sec+1, len(numbers)-1
while third < fourth:
if numbers[fir]+numbers[sec]+numbers[third]+numbers[fourth] < target:
third += 1
elif numbers[fir]+numbers[sec]+numbers[third]+numbers[fourth] > target:
fourth -= 1
else:
result.append((numbers[fir],numbers[sec],numbers[third],numbers[fourth]))
while third+1 < fourth and numbers[third] == numbers[third+1]: #skip duplicated
third += 1
third += 1
while fourth-1 > third and numbers[fourth] == numbers[fourth-1]: #skip duplicated
fourth -= 1
fourth -= 1
return result
6 BFS: used to solve minimum path related problem !!!
standard sample code 1: BFS do not need level traversal(general graph)
queue, visited = [], set()
queue.append(root)
visited.add(root)
while queue is not empty:
head = queue.pop(0)
for adj in head.connected:
if adj not in visited:
visited.add(adj)
queue.append(adj)
standard sample code 2: BFS requires level traversal(tree/certain type of uncircled graph)
queue, visited = [], set()
queue.append(root)
visited.add(root)
while queue is not empty:
size = queue.size()
for i in range(size): #here we expand current level in the queue
head = queue.pop(0)
for adj in head.connected:
if adj not in visited:
visited.add(adj)
queue.append(adj)
Bidirectional BFS:
adapt to undirected graph/starting and ending nodes are given
If using one BFS, we have N levels with x nodes per level, overall computation is x^N;
If using Bidirectional BFS, we have overall computation be 2*x^(N/2).
standard sample code:
startqueue, endqueue, startvisit, endvisit = [start], [end], {start}, {end}
step = 0
while startqueue is not empty or endqueue is not empty:
if startqueue is not empty:
step += 1
size = startqueue.size()
for i in range(size):
head = startqueue.pop(0)
for adj in head.connected:
if adj in startvisit:
continue
elif adj in endvisit: #forward direction is intersect with backward direction
return step
elif adj not in startvisit:
startvisit.add(adj)
startqueue.append(adj)
if endqueue is not empty:
step += 1
size = endqueue.size()
for i in range(size):
head = endqueue.pop(0)
for adj in head.connected:
if adj in endvisit:
continue
elif adj in startvisit: #backward direction is intersect with forward direction
return step
elif adj not in endvisit:
endvisit.add(adj)
endqueue.append(adj)
return -1 #if not connected between start and end
problem: serialize & deserialize binary tree sample code:
def serialize(self, root):
# write your code here
result, bfs_queue = "", [root]
while len(bfs_queue) > 0:
size = len(bfs_queue)
if bfs_queue == [None] * size:
break
for i in range(size):
head = bfs_queue.pop(0)
if head is not None:
result += str(head.val) + ','
bfs_queue.append(head.left) #make sure level order traversal !!
bfs_queue.append(head.right) ##make sure level order traversal !!
else:
result += "#,"
idx = len(result) - 1
while idx >= 0 and (result[idx] == "," or result[idx] == "#"):
idx -= 1
return result[:idx+1]
"""
@param data: A string serialized by your serialize method.
This method will be invoked second, the argument data is what exactly
you serialized at method "serialize", that means the data is not given by
system, it's given by your own serialize method. So the format of data is
designed by yourself, and deserialize it here as you serialize it in
"serialize" method.
"""
def deserialize(self, data):
# write your code here
if data == "":
return None
idx, bfs_queue, data_list = 1, [], data.split(',')
root = TreeNode(int(data_list[idx-1]))
bfs_queue.append(root)
while idx < len(data_list):
head = bfs_queue.pop(0)
if data_list[idx] != "#":
head.left = TreeNode(int(data_list[idx]))
bfs_queue.append(head.left)
if idx+1 < len(data_list) and data_list[idx+1] != "#":
head.right = TreeNode(int(data_list[idx+1]))
bfs_queue.append(head.right)
idx += 2
return root
using BFS to check if a graph is a valid tree:
#method 1 : BFS
if len(edges) != n-1: #check if there exists a cycle !!!
return False
adj_list, bfs_queue, set_visit, count = dict(), [], set(), 0
for pair in edges:
if pair[0] not in adj_list.keys():
adj_list[pair[0]] = []
if pair[1] not in adj_list.keys():
adj_list[pair[1]] = []
adj_list[pair[0]].append(pair[1])
adj_list[pair[1]].append(pair[0])
bfs_queue.append(0)
set_visit.add(0)
while len(bfs_queue) > 0:
current = bfs_queue.pop(0)
count += 1
if current in adj_list.keys():
for node in adj_list[current]:
if node not in set_visit:
set_visit.add(node)
bfs_queue.append(node)
return count == n
problem : remove invalid parenthese to get the minimum modification such that parentheses is valid
Remove the minimum number of invalid parentheses in order to make the input string valid. Return all possible results.
def check_valid(self, s):
left, right = 0, 0
for char in s:
if char == "(":
left += 1
elif char == ")":
right += 1
if right > left:
return False
return left == right
def bfs(self, s):
results, queue, visited, isfound = [], [s], set(), False
visited.add(s)
while len(queue) > 0:
size = len(queue)
for i in range(size):
current_seq = queue.pop(0)
if self.check_valid(current_seq) == True:
results.append(current_seq)
isfound = True
else:
if isfound == False:
for i in range(len(current_seq)):
if current_seq[i] == "(" or current_seq[i] == ")":
new_seq = current_seq[:i]+current_seq[i+1:]
if new_seq not in visited:
visited.add(new_seq)
queue.append(new_seq)
return results