PYTHON_STUDY BFS+SUFFIXARRAY+TWOPOINTERS

1 
suffix array:
the prefix of suffix array is the substring of the whole string
"banana" suffix array is ["banana","anana","nana","ana","na","a"]

2
rabin karp algo for substring match:
len_source, len_target = len(source), len(target)
        modulo = random.randint(1000000, 2000000)
        hash_source, hash_target, base = 0, 0, 26
        for idx in range(len_target):
            hash_target = ((ord(target[idx])-ord('a')) + base * hash_target) % modulo
            if hash_target < 0:
                hash_target += modulo
        max_base = 1
        for idx in range(len_target-1):
            max_base = (max_base * base) % modulo
        for idx in range(len_source):
            if idx >= len_target:
                hash_source = (hash_source - (ord(source[idx-len_target])-ord('a')) * max_base) % modulo
            hash_source = (ord(source[idx])-ord('a') + hash_source * base) % modulo
            if hash_source < 0:
                hash_source += modulo
            if idx >= len_target-1 and hash_source == hash_target:
                return idx-len_target+1
                
3 DFS for subsets II to deal with the duplicated cases:
def dfs_search(self, nums, st_idx, subsets, results):
        results.append(subsets[:])
        for idx in range(st_idx, len(nums)):
            if idx-1 >= 0 and nums[idx] == nums[idx-1] and idx > st_idx:#think about the situation [1,2',2''] 
            #when we have [2'] and [2''], how to avoid this ?
                continue
            self.dfs_search(nums, idx + 1, subsets+[nums[idx]], results) #make sure subsets+[nums[idx]] can 回溯 !
        return
        
4 rotated sorted array minimum sample code(consider duplicated case):
while(st < end) {
            mid = st + (end - st) / 2;
            if(nums[mid] < nums[end])
                end = mid;
            else if(nums[mid] > nums[end]) 
                st = mid + 1;
            else {
                if(nums[st] != nums[end]) 
                    end = mid;
                else {
                    int result = nums[mid], left = mid - 1, right = mid + 1;
                    while(left >= st || right <= end) {
                        if(left >= st) {
                            result = min(result, nums[left]);
                            --left;
                        }
                        if(right <= end) {
                            result = min(result, nums[right]);
                            ++right;
                        }
                    }
                    return result;
                }
            }
        }
            
  search target in rotated sorted array (consider duplicated case):
  int st = 0, end = A.size() - 1, mid;
        while(st < end) {
            mid = st + (end - st) / 2;
            if(A[st] == target || A[mid] == target || A[end] == target) return true;
            if(A[mid] < A[end]) {
                if(target > A[end]) end = mid;
                else if(target > A[mid]) st = mid + 1;
                else end = mid;
            }
            else if(A[mid] > A[end]) {
                if(target > A[mid]) st = mid + 1;
                else if(target > A[end]) end = mid;
                else st = mid + 1;
            }
            else {
                if(A[mid] > A[st]) { //middle > st is equivalent to middle > end
                    if(target > A[mid]) st = mid + 1;
                    else if(target > A[st]) end = mid;
                    else st = mid + 1;
                }
                else if(A[mid] < A[st]) { //middle < st is equivalent to middle < end
                    if(target > A[st]) end = mid;
                    else if(target > A[mid]) st = mid + 1;
                    else end = mid;
                }
                else {
                    int left = mid - 1, right = mid + 1;
                    while(left > st) {
                        if(A[left] == target) return true;
                        --left;
                    }
                    while(right < end) {
                        if(A[right] == target) return true;
                        ++right;
                    }
                    return false;
                }
            }
        }
        return false;

5 Two pointers algorithm:
1) how to remove duplicated elements in the solution set ?
ex: our result is a list of list and do not allow duplicated list element exists in our solution nested list
we should remove duplicated list as we progress the program instead of using set or check duplicated once we finished !!
problem 4sum:
numbers.sort()
        result = []
        for fir in range(len(numbers)-3):
            if fir != 0 and numbers[fir] == numbers[fir-1]: #skip duplicated
                continue
            for sec in range(fir+1, len(numbers)-2):
                if sec != fir+1 and numbers[sec] == numbers[sec-1]: #skip duplicated
                    continue
                third, fourth = sec+1, len(numbers)-1
                while third < fourth:
                    if numbers[fir]+numbers[sec]+numbers[third]+numbers[fourth] < target:
                        third += 1
                    elif numbers[fir]+numbers[sec]+numbers[third]+numbers[fourth] > target:
                        fourth -= 1
                    else:
                        result.append((numbers[fir],numbers[sec],numbers[third],numbers[fourth]))
                        while third+1 < fourth and numbers[third] == numbers[third+1]: #skip duplicated
                            third += 1
                        third += 1
                        while fourth-1 > third and numbers[fourth] == numbers[fourth-1]: #skip duplicated
                            fourth -= 1
                        fourth -= 1
        return result

6 BFS: used to solve minimum path related problem !!!
standard sample code 1: BFS do not need level traversal(general graph)
queue, visited = [], set()
queue.append(root)
visited.add(root)
while queue is not empty:
  head = queue.pop(0)
  for adj in head.connected:
    if adj not in visited:
      visited.add(adj)
      queue.append(adj)
      
standard sample code 2: BFS requires level traversal(tree/certain type of uncircled graph)
queue, visited = [], set()
queue.append(root)
visited.add(root)
while queue is not empty:
  size = queue.size()
  for i in range(size): #here we expand current level in the queue
    head = queue.pop(0)
    for adj in head.connected:
      if adj not in visited:
        visited.add(adj)
        queue.append(adj)
        
Bidirectional BFS:
adapt to undirected graph/starting and ending nodes are given
If using one BFS, we have N levels with x nodes per level, overall computation is x^N;
If using Bidirectional BFS, we have overall computation be 2*x^(N/2).
standard sample code:
startqueue, endqueue, startvisit, endvisit = [start], [end], {start}, {end}
step = 0
while startqueue is not empty or endqueue is not empty:
  if startqueue is not empty:
    step += 1
    size = startqueue.size()
    for i in range(size):
      head = startqueue.pop(0)
      for adj in head.connected:
        if adj in startvisit:
          continue
        elif adj in endvisit: #forward direction is intersect with backward direction
          return step
        elif adj not in startvisit:
          startvisit.add(adj)
          startqueue.append(adj)
  if endqueue is not empty:
    step += 1
    size = endqueue.size()
    for i in range(size):
      head = endqueue.pop(0)
      for adj in head.connected:
        if adj in endvisit:
          continue
        elif adj in startvisit: #backward direction is intersect with forward direction
          return step
        elif adj not in endvisit:
          endvisit.add(adj)
          endqueue.append(adj)
 return -1 #if not connected between start and end
 
 problem: serialize & deserialize binary tree sample code:
 def serialize(self, root):
        # write your code here
        result, bfs_queue = "", [root]
        while len(bfs_queue) > 0:
            size = len(bfs_queue)
            if bfs_queue == [None] * size:
                break
            for i in range(size):
                head = bfs_queue.pop(0)
                if head is not None:
                    result += str(head.val) + ','
                    bfs_queue.append(head.left) #make sure level order traversal !!
                    bfs_queue.append(head.right) ##make sure level order traversal !!
                else:
                    result += "#,"
                        
        idx = len(result) - 1
        while idx >= 0 and (result[idx] == "," or result[idx] == "#"):
            idx -= 1
                        
        return result[:idx+1]

    """
    @param data: A string serialized by your serialize method.
    This method will be invoked second, the argument data is what exactly
    you serialized at method "serialize", that means the data is not given by
    system, it's given by your own serialize method. So the format of data is
    designed by yourself, and deserialize it here as you serialize it in 
    "serialize" method.
    """
    def deserialize(self, data):
        # write your code here
        if data == "":
            return None
        idx, bfs_queue, data_list = 1, [], data.split(',')
        root = TreeNode(int(data_list[idx-1]))
        bfs_queue.append(root)
        while idx < len(data_list):
            head = bfs_queue.pop(0)
            if data_list[idx] != "#":
                head.left = TreeNode(int(data_list[idx]))
                bfs_queue.append(head.left)
            if idx+1 < len(data_list) and data_list[idx+1] != "#":
                head.right = TreeNode(int(data_list[idx+1]))
                bfs_queue.append(head.right)
            idx += 2
        return root

using BFS to check if a graph is a valid tree:
#method 1 : BFS
        if len(edges) != n-1: #check if there exists a cycle !!!
            return False
        
        adj_list, bfs_queue, set_visit, count = dict(), [], set(), 0
        
        for pair in edges:
            if pair[0] not in adj_list.keys():
                adj_list[pair[0]] = []
            if pair[1] not in adj_list.keys():
                adj_list[pair[1]] = []
            adj_list[pair[0]].append(pair[1])
            adj_list[pair[1]].append(pair[0])
        
        bfs_queue.append(0)
        set_visit.add(0)
        
        while len(bfs_queue) > 0:
            current = bfs_queue.pop(0)
            count += 1
            if current in adj_list.keys():
                for node in adj_list[current]:
                    if node not in set_visit:
                        set_visit.add(node)
                        bfs_queue.append(node)
                    
        return count == n

problem : remove invalid parenthese to get the minimum modification such that parentheses is valid
Remove the minimum number of invalid parentheses in order to make the input string valid. Return all possible results.
def check_valid(self, s):
        left, right = 0, 0
        for char in s:
            if char == "(":
                left += 1
            elif char == ")":
                right += 1
            if right > left:
                return False
        return left == right
        
    def bfs(self, s):
        results, queue, visited, isfound = [], [s], set(), False
        visited.add(s)
        while len(queue) > 0:
            size = len(queue)
            for i in range(size):
                current_seq = queue.pop(0)
                if self.check_valid(current_seq) == True:
                    results.append(current_seq)
                    isfound = True
                else:
                    if isfound == False:
                        for i in range(len(current_seq)):
                            if current_seq[i] == "(" or current_seq[i] == ")":
                                new_seq = current_seq[:i]+current_seq[i+1:]
                                if new_seq not in visited:
                                    visited.add(new_seq)
                                    queue.append(new_seq)

        return results