diff --git a/utils/mcp_maxar_deletes/output_first_n_lines_of_each_file.py b/utils/mcp_maxar_deletes/output_first_n_lines_of_each_file.py new file mode 100644 index 0000000..eb38665 --- /dev/null +++ b/utils/mcp_maxar_deletes/output_first_n_lines_of_each_file.py @@ -0,0 +1,54 @@ +# output_first_n_lines_of_each_file.py + +# python output_first_n_lines_of_each_file.py + + +# The point of this file is to output the first 5 lines of each file, just so we can see the sample + +SETTING__files_collection = [ +'manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_CBA_DR_COMPLETE_MANIFEST_FILE_LIST__2024-10-21.txt', +'manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt', +'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt', +'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt', +'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_Pan_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', +'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv' +] + + +def print_first_n_lines_from_file(file_path="", n=5): + print(f'') + print(f'About to read {n} lines from File: {file_path}') + try: + with open(file_path, 'r') as file: + # Read the first 5 lines + #for i in range(5): + for i in range(n): + line = file.readline() + if not line: + print(f'We seemed to have reached the end of this file before {n} lines!') + break + print(f' Line {i}: {line.strip()}') + except FileNotFoundError: + print(f'File not found: {file_path}') + except Exception as e: + print(f'An error occured: {e}') + print(f'') + + + +def main(): + print(f'output_first_5_lines_of_each_file: STARTED') + + print(f'') + print(f'Current File Paths (SETTING__files_collection): {SETTING__files_collection}') + print(f'') + + for file_path in SETTING__files_collection: + #print(f'file_path: {file_path}') + print_first_n_lines_from_file(file_path=file_path, n=10) + + print(f'output_first_5_lines_of_each_file: Reached the End') + + + +main() \ No newline at end of file diff --git a/utils/mcp_maxar_deletes/step_02__filter_lists.py b/utils/mcp_maxar_deletes/step_02__filter_lists.py new file mode 100644 index 0000000..f6fd6c7 --- /dev/null +++ b/utils/mcp_maxar_deletes/step_02__filter_lists.py @@ -0,0 +1,209 @@ +# step_02__filter_lists.py + +# python step_02__filter_lists.py + +import datetime + + +# # Quick Script Metrics +# Processing a 32 GB csv file took about 40 seconds. +# A way to make this more efficient would be to only iterate the large list once (and do all the filtering horizontally (all at once)) +# Not needed - Code is easier to follow when it is more linear like this. + + +# Here are the lists I need to KEEP (Into Separate output files) +# +# String Starts with: "csdap-maxar-delivery","css/nga/GE01/1B/ +# String Starts with: "csdap-maxar-delivery","css/nga/WV01/1B/ +# String Starts with: "csdap-maxar-delivery","css/nga/WV02/1B/ +# String Starts with: "csdap-maxar-delivery","css/nga/WV03/1B/ +# String Starts with: "csdap-maxar-delivery","css/nga/WV04/1B/ + + +SETTINGS__Input_File = 'manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt' +SETTINGS__Output_Objects = [ + {'filter_string':'"csdap-maxar-delivery","css/nga/GE01/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt'}, + {'filter_string':'"csdap-maxar-delivery","css/nga/WV01/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt'}, + {'filter_string':'"csdap-maxar-delivery","css/nga/WV02/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt'}, + {'filter_string':'"csdap-maxar-delivery","css/nga/WV03/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt'}, + {'filter_string':'"csdap-maxar-delivery","css/nga/WV04/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt'} +] + +# Filter the Input +def filter_lines_in_file(file_path="", filter_string='DEFAULT_FILTER_STRING'): + print(f'') + print(f'About to filter lines from File: {file_path}') + print(f' Using Filter String: {filter_string}') + + matching_lines = [] + counter__total_lines = 0 + + + try: + with open(file_path, 'r') as file: + # Iterate through each line in the file + for line in file: + # Check the filter_string is in the line + if filter_string in line: + matching_lines.append(line.strip()) # Add this line to the return object without new line characters. + # + # Increment the counter. + counter__total_lines = counter__total_lines + 1 + except FileNotFoundError: + print(f'File not found: {file_path}') + except Exception as e: + print(f'An error occured: {e}') + + num_of_lines_to_return = len(matching_lines) + + print(f'') + print(f'Finished processing {file_path}') + print(f' Filtered {counter__total_lines} total lines') + print(f' Keeping {num_of_lines_to_return} lines that passed the filter') + print(f'') + + # Return the array + return matching_lines + +# Write the output +def write_filtered_output(file_path="", lines_to_write=[]): + print(f'') + print(f'About to write the filtered lines to the output file: {file_path}') + print(f' Num of line to write: {len(lines_to_write)}') + print(f'') + counter__lines_written = 0 + try: + # Open the file in write mode, which replaces the file if it already exists. + with open(file_path, 'w') as file: + # Write each line in the array to the file. + for line in lines_to_write: + file.write(line + '\n') + counter__lines_written = counter__lines_written + 1 + print(f'Successfully wrote to file: {file_path}') + print(f' Number of lines written: {counter__lines_written}') + except Exception as e: + print(f'An error occured: {e}') + +# Main Entry Point +def main(): + print(f'main: STARTED') + datetime__START = datetime.datetime.utcnow() + print(f'') + print(f'Filtering lists...') + print(f'') + print(f'Settings Items:') + print(f' SETTINGS__Input_File: {SETTINGS__Input_File}') + print(f' SETTINGS__Output_Objects: {SETTINGS__Output_Objects}') + print(f'') + # + for output_obj in SETTINGS__Output_Objects: + print(f'---------------------------------------------------------------') + current_filtered_lines_list = filter_lines_in_file(file_path=SETTINGS__Input_File, filter_string=output_obj['filter_string']) + write_filtered_output(file_path=output_obj['out_file'], lines_to_write=current_filtered_lines_list) + print(f'---------------------------------------------------------------') + # + datetime__END = datetime.datetime.utcnow() + total_time__str = str(datetime__END-datetime__START) + print(f'main: Reached the End -- Total Execution Time: {total_time__str}') + + + +main() + + + +# # Output from running this +# +# +# ➜ mcp_MAXAR_deletes__q4_2024 python step_02__filter_lists.py +# main: STARTED + +# Filtering lists... + +# Settings Items: +# SETTINGS__Input_File: manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# SETTINGS__Output_Objects: [{'filter_string': '"csdap-maxar-delivery","css/nga/GE01/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt'}, {'filter_string': '"csdap-maxar-delivery","css/nga/WV01/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt'}, {'filter_string': '"csdap-maxar-delivery","css/nga/WV02/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt'}, {'filter_string': '"csdap-maxar-delivery","css/nga/WV03/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt'}, {'filter_string': '"csdap-maxar-delivery","css/nga/WV04/1B/', 'out_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt'}] + +# --------------------------------------------------------------- + +# About to filter lines from File: manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Using Filter String: "csdap-maxar-delivery","css/nga/GE01/1B/ + +# Finished processing manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Filtered 183106060 total lines +# Keeping 13383320 lines that passed the filter + + +# About to write the filtered lines to the output file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt +# Num of line to write: 13383320 + +# Successfully wrote to file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt +# Number of lines written: 13383320 +# --------------------------------------------------------------- +# --------------------------------------------------------------- + +# About to filter lines from File: manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Using Filter String: "csdap-maxar-delivery","css/nga/WV01/1B/ + +# Finished processing manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Filtered 183106060 total lines +# Keeping 40729489 lines that passed the filter + + +# About to write the filtered lines to the output file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt +# Num of line to write: 40729489 + +# Successfully wrote to file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt +# Number of lines written: 40729489 +# --------------------------------------------------------------- +# --------------------------------------------------------------- + +# About to filter lines from File: manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Using Filter String: "csdap-maxar-delivery","css/nga/WV02/1B/ + +# Finished processing manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Filtered 183106060 total lines +# Keeping 79638458 lines that passed the filter + + +# About to write the filtered lines to the output file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt +# Num of line to write: 79638458 + +# Successfully wrote to file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt +# Number of lines written: 79638458 +# --------------------------------------------------------------- +# --------------------------------------------------------------- + +# About to filter lines from File: manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Using Filter String: "csdap-maxar-delivery","css/nga/WV03/1B/ + +# Finished processing manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Filtered 183106060 total lines +# Keeping 36696704 lines that passed the filter + + +# About to write the filtered lines to the output file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt +# Num of line to write: 36696704 + +# Successfully wrote to file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt +# Number of lines written: 36696704 +# --------------------------------------------------------------- +# --------------------------------------------------------------- + +# About to filter lines from File: manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Using Filter String: "csdap-maxar-delivery","css/nga/WV04/1B/ + +# Finished processing manifest_work_area/cached_full_list_of_bucket_key_paths/complete__MCP_COMPLETE_MANIFEST_FILE_LIST__2024-10-19.txt +# Filtered 183106060 total lines +# Keeping 93754 lines that passed the filter + + +# About to write the filtered lines to the output file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt +# Num of line to write: 93754 + +# Successfully wrote to file: step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt +# Number of lines written: 93754 +# --------------------------------------------------------------- +# main: Reached the End -- Total Execution Time: 0:07:43.008125 +# ➜ mcp_MAXAR_deletes__q4_2024 + diff --git a/utils/mcp_maxar_deletes/step_03__getting_granule_id_lists.py b/utils/mcp_maxar_deletes/step_03__getting_granule_id_lists.py new file mode 100644 index 0000000..c6812eb --- /dev/null +++ b/utils/mcp_maxar_deletes/step_03__getting_granule_id_lists.py @@ -0,0 +1,413 @@ +# step_03__getting_granule_id_lists.py + +# python step_03__getting_granule_id_lists.py + + +import datetime + + + +# Settings for OLD_NGAP_MAXAR +SETTINGS__is_run__OLD_NGAP_MAXAR = False #True +SETTINGS__MODE__OLD_NGAP_MAXAR_Paths = [ + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, + {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'} +] + + + +# Settings for MCP_MAXAR +SETTINGS__is_run__MCP_MAXAR = True +SETTINGS__MODE__MCP_MAXAR_Paths = [ + {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt'}, + {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt'}, + {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt'}, + {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt'}, + {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt'} + +] + + + +### ###################################################### +### ### SECTION ### MCP Convert File List to Just the Path to a Granule (Unique List) +### ###################################################### + + + + + +# For The MCP List, I just need to convert the large list of each individual file down to just a path to the granule. +# This means that after removing the end filename part, we will have something like 9 duplicates per granule. +# The last step will be to just remove the duplicates on the outfile. +# The file size (and memory size) of this list should be reduced by about 90% when doing it this way. +def process_MODE__MCP_MAXAR_Path(mcp_maxar_path_object={}): + counter__total_lines = 0 + counter__granule_lines = 0 + granule_path_only_lines = [] + num_of_granule_paths = 0 + #num_of_granule_paths_BEFORE_unique = 0 + #num_of_granule_paths_AFTER_unique = 0 + try: + # Open the File at 'in_file' and read the list into memory + file_path__in_file = mcp_maxar_path_object['in_file'] + file_path__out_file = mcp_maxar_path_object['out_file'] + # + with open(file_path__in_file, 'r') as in_file: + # Parse the list and add just the Granule Ids to a new list + # Iterate through each line in the file + for line in in_file: + # Get the Granule ID + #if(counter__total_lines == 1): + # First, Ignore the items that do not have -thumb.jpg + # -thumb.jpg + if('-thumb.jpg' in line): + granule_path = MCP_MAXAR__get_granule_path_from_input_str(input_str=line) + granule_path_only_lines.append(granule_path) + counter__granule_lines = counter__granule_lines + 1 + # + # Increment the counter. + #if(counter__total_lines == 1): + # print(f'DEBUG: (granule_id): {granule_id}') + counter__total_lines = counter__total_lines + 1 + + # Make sure the list of Granules is unique + num_of_granules__BEFORE = len(granule_path_only_lines) + granule_path_only_lines = list(set(granule_path_only_lines)) + num_of_granules__AFTER = len(granule_path_only_lines) + print(f' Num of Granule Paths BEFORE and AFTER {num_of_granules__BEFORE} and {num_of_granules__AFTER}') + + # Save the new list to a new file at 'out_file' (overwriting if it already exists) + with open(file_path__out_file, 'w') as out_file: + # Iterate each line in the list. + for line_to_write in granule_path_only_lines: + # Write the line and a newline char ('\n') at the end + #out_file.write(line_to_write+'\n') + out_file.write(line_to_write) + num_of_granule_paths = len(granule_path_only_lines) + print(f' process_MODE__MCP_MAXAR_Path: Saved {num_of_granule_paths} Granule IDs to a File saved at: {file_path__out_file}') + + except Exception as e: + print(f'process_MODE__MCP_MAXAR_Path: ERROR: An error occured: {e}') + + num_of_granule_paths = len(granule_path_only_lines) + print(f' process_MODE__MCP_MAXAR_Path: Function Finished with {num_of_granule_paths} Granule Paths and {counter__total_lines} Total Lines Processed ') + + +# Convert this: WV04_Pan_L1B___1/2018/297/WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009/WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009-thumb.jpg +# To this: WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009 +def MCP_MAXAR__get_granule_path_from_input_str(input_str=""): + ret_str = '' + + # Expected input (An Already filtered down to just the thumbfile link) + # # '"csdap-maxar-delivery","css/nga/WV04/1B/2018/297/WV04_9d1b1558-19ce-4d6e-873e-1a726f579bb3-inv_X1BS_059420300030_01/WV04_20181024105141_9d1b1558-19ce-4d6e-873e-1a726f579bb3-inv_18OCT24105141-P1BS-059420300030_01_P009-thumb.jpg"' + + try: + # First, remove '"csdap-maxar-delivery","' + str_1 = input_str.replace('"csdap-maxar-delivery","','') # 'css/nga/WV04/1B/2018/297/WV04_9d1b1558-19ce-4d6e-873e-1a726f579bb3-inv_X1BS_059420300030_01/WV04_20181024105141_9d1b1558-19ce-4d6e-873e-1a726f579bb3-inv_18OCT24105141-P1BS-059420300030_01_P009-thumb.jpg"' + + # Next, Remove the filename and last quote + ret_str = str_1.replace('-thumb.jpg"', '') # 'css/nga/WV04/1B/2018/297/WV04_9d1b1558-19ce-4d6e-873e-1a726f579bb3-inv_X1BS_059420300030_01/WV04_20181024105141_9d1b1558-19ce-4d6e-873e-1a726f579bb3-inv_18OCT24105141-P1BS-059420300030_01_P009' + + except Exception as e: + # On Error, set return string to blank + #print(f' err (e): {e}') + ret_str = '' + return ret_str + + + + + + + + + +### ###################################################### +### ### SECTION ### OLD_NGAP Convert To just a Granule ID +### ###################################################### + + +# For the OLD_NGAP Lists, I need to filter down to ONLY the Granule ID. +# Again, there may be duplicates depending on which source is used (I believe I picked the public bucket so it would only be a list of thumbs, which means no duplicates) +# The file size of this list will be reduced by a very large amount (greater than 90% reduction, maybe 98% since paths are all very long), since the rest of the path is not part of it. +def process_MODE__OLD_NGAP_MAXAR_Paths(old_ngap_maxar_path_object={}): + + counter__total_lines = 0 + granule_id_lines = [] + num_of_granule_ids = 0 + try: + # Open the File at 'in_file' and read the list into memory + file_path__in_file = old_ngap_maxar_path_object['in_file'] + file_path__out_file = old_ngap_maxar_path_object['out_file'] + # + with open(file_path__in_file, 'r') as in_file: + # Parse the list and add just the Granule Ids to a new list + # Iterate through each line in the file + for line in in_file: + # Get the Granule ID + #if(counter__total_lines == 1): + granule_id = OLD_NGAP_MAXAR__get_granule_id_from_input_str(input_str=line) + granule_id_lines.append(granule_id) + # + # Increment the counter. + #if(counter__total_lines == 1): + # print(f'DEBUG: (granule_id): {granule_id}') + counter__total_lines = counter__total_lines + 1 + + # Make sure the list of Granules is unique + num_of_granules__BEFORE = len(granule_id_lines) + granule_id_lines = list(set(granule_id_lines)) + num_of_granules__AFTER = len(granule_id_lines) + print(f' Num of Granules BEFORE and AFTER {num_of_granules__BEFORE} and {num_of_granules__AFTER}') + + # Save the new list to a new file at 'out_file' (overwriting if it already exists) + with open(file_path__out_file, 'w') as out_file: + # Iterate each line in the list. + for line_to_write in granule_id_lines: + # Write the line and a newline char ('\n') at the end + #out_file.write(line_to_write+'\n') + out_file.write(line_to_write) + num_of_granule_ids = len(granule_id_lines) + print(f' process_MODE__OLD_NGAP_MAXAR_Paths: Saved {num_of_granule_ids} Granule IDs to a File saved at: {file_path__out_file}') + + except Exception as e: + print(f'process_MODE__OLD_NGAP_MAXAR_Paths: ERROR: An error occured: {e}') + + num_of_granule_ids = len(granule_id_lines) + print(f' process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with {num_of_granule_ids} Granules and {counter__total_lines} Total Lines Processed ') + + +# Convert this: WV04_Pan_L1B___1/2018/297/WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009/WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009-thumb.jpg +# To this: WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009 +def OLD_NGAP_MAXAR__get_granule_id_from_input_str(input_str=""): + ret_str = '' + try: + # Remove directory paths + str_1 = input_str.split('/')[-1] # 'WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009-thumb.jpg' + #print(f' str_1: {str_1}') + + # Remove '-thumb.jpg' + ret_str = str_1.replace('-thumb.jpg', '') # 'WV04_20181024105205_bc4e6462-5a0b-4958-ad90-86f9874314bb-inv_18OCT24105205-P1BS-059420300020_01_P009' + #print(f' ret_str: {ret_str}') + + except Exception as e: + # On Error, set return string to blank + #print(f' err (e): {e}') + ret_str = '' + return ret_str + + +def main(): + print(f'main: STARTED') + datetime__START = datetime.datetime.utcnow() + + if(SETTINGS__is_run__OLD_NGAP_MAXAR == True): + print(f'') + print(f'Converting OLD_NGAP MAXAR lists into just Lists of Granule IDs...') + print(f'') + print(f'Settings Items:') + print(f' SETTINGS__is_run__OLD_NGAP_MAXAR: {SETTINGS__is_run__OLD_NGAP_MAXAR}') + print(f' SETTINGS__MODE__OLD_NGAP_MAXAR_Paths: {SETTINGS__MODE__OLD_NGAP_MAXAR_Paths}') + print(f'') + # + for path_obj in SETTINGS__MODE__OLD_NGAP_MAXAR_Paths: + print(f'---------------------------------------------------------------') + #print(f'TODO!!! PROCESS THIS ITEM!! OLD_NGAP_MAXAR: (path_obj): {path_obj}') + process_MODE__OLD_NGAP_MAXAR_Paths(old_ngap_maxar_path_object=path_obj) + print(f'---------------------------------------------------------------') + + + if(SETTINGS__is_run__MCP_MAXAR == True): + print(f'') + print(f'Converting MCP MAXAR Lists into just List of Unique Granule Paths...') + print(f'') + print(f'Settings Items:') + print(f' SETTINGS__is_run__MCP_MAXAR: {SETTINGS__is_run__MCP_MAXAR}') + print(f' SETTINGS__MODE__MCP_MAXAR_Paths: {SETTINGS__MODE__MCP_MAXAR_Paths}') + print(f'') + # + for path_obj in SETTINGS__MODE__MCP_MAXAR_Paths: + print(f'---------------------------------------------------------------') + #print(f'TODO!!! PROCESS THIS ITEM!! MCP_MAXAR: (path_obj): {path_obj}') + process_MODE__MCP_MAXAR_Path(mcp_maxar_path_object=path_obj) + print(f'---------------------------------------------------------------') + + + + + datetime__END = datetime.datetime.utcnow() + total_time__str = str(datetime__END-datetime__START) + print(f'main: Reached the End -- Total Execution Time: {total_time__str}') + +main() + + + +# ################################### +# # Output --- MCP_MAXAR +# ################################### + +# ➜ mcp_MAXAR_deletes__q4_2024 python step_03__getting_granule_id_lists.py +# main: STARTED + +# Converting MCP MAXAR Lists into just List of Unique Granule Paths... + +# Settings Items: +# SETTINGS__is_run__MCP_MAXAR: True +# SETTINGS__MODE__MCP_MAXAR_Paths: [{'in_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt'}, {'in_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt'}, {'in_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt'}, {'in_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt'}, {'in_file': 'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt'}] + +# --------------------------------------------------------------- +# Num of Granule Paths BEFORE and AFTER 1845101 and 1845101 +# process_MODE__MCP_MAXAR_Path: Saved 1845101 Granule IDs to a File saved at: step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt +# process_MODE__MCP_MAXAR_Path: Function Finished with 1845101 Granule Paths and 13383320 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granule Paths BEFORE and AFTER 5078347 and 5078347 +# process_MODE__MCP_MAXAR_Path: Saved 5078347 Granule IDs to a File saved at: step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt +# process_MODE__MCP_MAXAR_Path: Function Finished with 5078347 Granule Paths and 40729489 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granule Paths BEFORE and AFTER 10339661 and 10339661 +# process_MODE__MCP_MAXAR_Path: Saved 10339661 Granule IDs to a File saved at: step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt +# process_MODE__MCP_MAXAR_Path: Function Finished with 10339661 Granule Paths and 79638458 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granule Paths BEFORE and AFTER 4627513 and 4627513 +# process_MODE__MCP_MAXAR_Path: Saved 4627513 Granule IDs to a File saved at: step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt +# process_MODE__MCP_MAXAR_Path: Function Finished with 4627513 Granule Paths and 36696704 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granule Paths BEFORE and AFTER 12705 and 12705 +# process_MODE__MCP_MAXAR_Path: Saved 12705 Granule IDs to a File saved at: step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt +# process_MODE__MCP_MAXAR_Path: Function Finished with 12705 Granule Paths and 93754 Total Lines Processed +# --------------------------------------------------------------- +# main: Reached the End -- Total Execution Time: 0:00:57.396265 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 ls -lah step_02__filtering_large_manifests_down/filtered_lists +# total 58898008 +# drwxr-xr-x 7 kstanto1 staff 224B Nov 3 15:50 . +# drwxr-xr-x 4 kstanto1 staff 128B Oct 29 22:51 .. +# -rw-r--r--@ 1 kstanto1 staff 2.2G Oct 29 23:32 MCP__Delivery_Bucket__GE01_1B__BOTH.txt +# -rw-r--r--@ 1 kstanto1 staff 6.7G Oct 29 23:33 MCP__Delivery_Bucket__WV01_1B__BOTH.txt +# -rw-r--r--@ 1 kstanto1 staff 13G Oct 29 23:37 MCP__Delivery_Bucket__WV02_1B__BOTH.txt +# -rw-r--r--@ 1 kstanto1 staff 6.0G Oct 29 23:39 MCP__Delivery_Bucket__WV03_1B__BOTH.txt +# -rw-r--r--@ 1 kstanto1 staff 20M Oct 29 23:39 MCP__Delivery_Bucket__WV04_1B__BOTH.txt +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 ls -lah step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists +# total 6251584 +# drwxr-xr-x 7 kstanto1 staff 224B Nov 3 20:36 . +# drwxr-xr-x 6 kstanto1 staff 192B Nov 3 18:56 .. +# -rw-r--r-- 1 kstanto1 staff 255M Nov 3 20:36 MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt +# -rw-r--r-- 1 kstanto1 staff 702M Nov 3 20:36 MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt +# -rw-r--r-- 1 kstanto1 staff 1.4G Nov 3 20:36 MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt +# -rw-r--r-- 1 kstanto1 staff 640M Nov 3 20:36 MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt +# -rw-r--r-- 1 kstanto1 staff 2.3M Nov 3 20:36 MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt +# ➜ mcp_MAXAR_deletes__q4_2024 + + + + + +# ################################### +# # Output --- OLD_NGAP_MAXAR +# ################################### + + +# ➜ mcp_MAXAR_deletes__q4_2024 python step_03__getting_granule_id_lists.py +# main: STARTED + +# Converting OLD_NGAP MAXAR lists into just Lists of Granule IDs... + +# Settings Items: +# SETTINGS__is_run__OLD_NGAP_MAXAR: True +# SETTINGS__MODE__OLD_NGAP_MAXAR_Paths: [{'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, {'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, {'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, {'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, {'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, {'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, {'in_file': 'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}] + +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 722623 and 722623 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 722623 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 722623 Granules and 722623 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 5023086 and 5023086 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 5023086 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 5023086 Granules and 5023086 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 3111290 and 3111290 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 3111290 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 3111290 Granules and 3111290 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 1882712 and 1882712 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 1882712 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 1882712 Granules and 1882712 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 2515801 and 2515801 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 2515801 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 2515801 Granules and 2515801 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 6753 and 6753 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 6753 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 6753 Granules and 6753 Total Lines Processed +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# Num of Granules BEFORE and AFTER 6753 and 6753 +# process_MODE__OLD_NGAP_MAXAR_Paths: Saved 6753 Granule IDs to a File saved at: step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# process_MODE__OLD_NGAP_MAXAR_Paths: Function Finished with 6753 Granules and 6753 Total Lines Processed +# --------------------------------------------------------------- + +# Converting MCP MAXAR Lists into just List of Unique Granule Paths... + +# Settings Items: +# SETTINGS__is_run__MCP_MAXAR: True +# SETTINGS__MODE__MCP_MAXAR_Paths: [] + +# main: Reached the End -- Total Execution Time: 0:00:12.138899 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 +# ➜ mcp_MAXAR_deletes__q4_2024 ls -lah step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW +# total 4925360 +# drwxr-xr-x 10 kstanto1 staff 320B Oct 29 22:09 . +# drwxr-xr-x 4 kstanto1 staff 128B Nov 3 18:56 .. +# -rw-r--r--@ 1 kstanto1 staff 6.0K Oct 29 10:49 .DS_Store +# -rw-r--r--@ 1 kstanto1 staff 131M Oct 29 11:29 GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# -rw-r--r--@ 1 kstanto1 staff 910M Oct 29 11:10 WV01_Pan_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# -rw-r--r--@ 1 kstanto1 staff 564M Oct 29 11:04 WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# -rw-r--r--@ 1 kstanto1 staff 341M Oct 29 10:54 WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# -rw-r--r--@ 1 kstanto1 staff 456M Oct 29 10:52 WV03_Pan_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# -rw-r--r--@ 1 kstanto1 staff 1.5M Oct 29 10:49 WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# -rw-r--r--@ 1 kstanto1 staff 1.5M Oct 29 10:49 WV04_Pan_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv +# ➜ mcp_MAXAR_deletes__q4_2024 ls -lah step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists +# total 1998128 +# drwxr-xr-x 9 kstanto1 staff 288B Nov 3 19:22 . +# drwxr-xr-x 6 kstanto1 staff 192B Nov 3 18:56 .. +# -rw-r--r-- 1 kstanto1 staff 53M Nov 3 19:22 GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# -rw-r--r-- 1 kstanto1 staff 369M Nov 3 19:22 WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# -rw-r--r-- 1 kstanto1 staff 228M Nov 3 19:22 WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# -rw-r--r-- 1 kstanto1 staff 138M Nov 3 19:22 WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# -rw-r--r-- 1 kstanto1 staff 185M Nov 3 19:22 WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# -rw-r--r-- 1 kstanto1 staff 666K Nov 3 19:22 WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# -rw-r--r-- 1 kstanto1 staff 666K Nov 3 19:22 WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt +# ➜ mcp_MAXAR_deletes__q4_2024 + + + +# DRAFT +# # IMPORTANT +# This script has multiple modes of operation. +# For now the two modes are MCP_MAXAR and OLD_NGAP_MAXAR +# This is toggled by a setting below. +# +# Setting for which Mode we are operating in. diff --git a/utils/mcp_maxar_deletes/step_04__getting_final_MCP_Safe_To_Delete_paths.py b/utils/mcp_maxar_deletes/step_04__getting_final_MCP_Safe_To_Delete_paths.py new file mode 100644 index 0000000..f10a713 --- /dev/null +++ b/utils/mcp_maxar_deletes/step_04__getting_final_MCP_Safe_To_Delete_paths.py @@ -0,0 +1,687 @@ +# step_04__getting_final_MCP_Safe_To_Delete_paths.py + +# python step_04__getting_final_MCP_Safe_To_Delete_paths.py + +import datetime + + +# The array to hold each config object. +# # 3 file paths, 2 inputs, and 1 output. +# # 2 file inputs are used, +# # The first file input is the main list that will be filtered, +# # The second file input is the list used to filter against the first +# # The file output is where the filtered list will end up. +# +SETTINGS__Processing_Objects = [ + # {'in_file__MCP': '', 'in_file__NGAP': '', 'out_file__MCP': ''} + # Note, there are not duplicates objects here -- For WV03 and WV04, the MCP lists are combined, but the NGAP lists are separate. + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/GE01_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV01_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV03_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV03_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV04_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV04_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +] + +# # Settings with only a single item (useful for debugging and figuring out the timing) +# SETTINGS__Processing_Objects = [ +# # {'in_file__MCP': '', 'in_file__NGAP': '', 'out_file__MCP': ''} +# # Note, there are not duplicates objects here -- For WV03 and WV04, the MCP lists are combined, but the NGAP lists are separate. +# {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# ] + +SETTINGS__Add_Formatting_To_Final_Output = True + + +# This function adds final output text to each string so that we can have it in the exact format for the Batch Job that will use it! +# Actually, this function adds formating to one string at a time, but it is called for every string if the setting 'SETTINGS__Add_Formatting_To_Final_Output' is set to True +def add_formatting_to_string(in_str=''): + #ret_str = f'ADD_FORMATTING__{in_str}' + ret_str = f'csdap-maxar-delivery,{in_str}' # This adds the bucket name and a comma in front of an input Granule Path String + return ret_str + +# Open a (text) file, read each line into an array and return the array +def open_file_and_load_lines_into_array(in_file_path=''): + ret_array = [] + with open(in_file_path, 'r') as in_file: + for line in in_file: + ret_array.append(line) + return ret_array + +# The Heavy Lifting +# +def process_lists(process_obj={}): + + # Lists + in_file__MCP__Granule_Paths_List = [] # Array of Items that Look like this: # css/nga/WV04/1B/2018/024/WV04_318ca2f5-facd-407b-9e49-ae9fc7b6d4f8-inv_X1BS_059102578120_01/WV04_20180124003728_318ca2f5-facd-407b-9e49-ae9fc7b6d4f8-inv_18JAN24003728-P1BS-059102578120_01_P002 + in_file__NGAP__Granule_List = [] # Array of Items that Look like this: # WV04_20180117002416_6e233411-599c-4135-8f2d-943a4a0b1528-inv_18JAN17002416-P1BS-059102602170_01_P001 + out_file__MCP__Filtered_Granule_Paths_List = [] # Array of Items that look like the in_file__MCP__Granule_Paths_List but only contain items where the GranuleID existed in the NGAP list as well. + # + # Counters + counter__outer_loop_iterations = 0 + counter__total_inner_loop_iterations = 0 + # + # Reporting in progress. + frequency_of_reporting_inner_loops = 100000000 # 10000000 #1000000 # 100000 + counter__num_of_inner_loop_reports = 0 + # + num_of_granule_paths_found = 0 + + # Dictionary Method + ngap_granule_ids__not_found = [] # List of Granule IDs that are not found at all + + + try: + # {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/GE01_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, + in_file__MCP = process_obj['in_file__MCP'] + in_file__NGAP = process_obj['in_file__NGAP'] + out_file__MCP = process_obj['out_file__MCP'] + + # CONTINUE HERE ---- OPEN THE FILES,, DO THE CHECKING LOOPS, THEN OUTPUT THE NEW LIST! + + # Open the two input files, load them in memory, apply the filtering, write the output of specific items that passed the filter + + + # Open in_file__MCP + try: + in_file__MCP__Granule_Paths_List = open_file_and_load_lines_into_array(in_file_path=in_file__MCP) + except Exception as e: + print(f'process_lists: ERROR: Error Loading MCP Granule Paths List. System Message: {e}') + + + # Open in_file__NGAP + try: + in_file__NGAP__Granule_List = open_file_and_load_lines_into_array(in_file_path=in_file__NGAP) + except Exception as e: + print(f'process_lists: ERROR: Error Loading NGAP Granule IDs List. System Message: {e}') + + # Process the List + try: + # I need a theoretical max inner range to be able to estimate percent progress. + theoretical_max_inner_loops = len(in_file__MCP__Granule_Paths_List) * len(in_file__NGAP__Granule_List) + + # Prevent Dividing by zero on errors further down below. + if(theoretical_max_inner_loops < 1): + theoretical_max_inner_loops = 1 + + + + + + + ## Dictionary Method + # + # Create a Dictionary and do some interesting stuff with Keys and values and flags to iterate the list less often + # + # Example from NGAP Safe To Delete List (Granule ID Only): WV04_20181218053031_b913f7ec-1a50-4fb1-8e66-0b32fcaeee21-inv_18DEC18053031-P1BS-059420283030_01_P008 + # Example from MCP Path Item: css/nga/WV04/1B/2018/052/WV04_90fce728-fbec-40c1-bd39-74e524eac58c-inv_X1BS_059102542080_01/WV04_20180221235737_90fce728-fbec-40c1-bd39-74e524eac58c-inv_18FEB21235737-M1BS-059102542080_01_P002 + # The reason I need to do this at all is because: The Info we need to keep is located in the MIDDLE of the URL and did not make it to the NGAP server, so that info in the original path is not part of the data after ingestion + # + # Detailed Description of Dictionary Method + # + # Create a dictionary + # Use the Granule IDs as Keys + # Iterate MCP list, + # Filter out to just the granule_id, + # Add every MCP path to ['granule_id']['path'] (so this is as a sub key'd value) + # Set a flag to False by default ['granule_id']['is_safe_to_delete'] // Defaulting to false + # Iterate NGAP list + # Check to see if the granule_id already has a key in the dictionary + # If it does, set the flag ['granule_id']['is_safe_to_delete'] to True + # If it does not, Add this granule_id to a separate array called 'ngap_granule_id__not_found' + # After the above processing + # Get the list of Keys from the Dictionary + # Iterate that list of keys + # Check the current Item property dictionary['granule_id']['is_safe_to_delete'] to see if it is True or False + # If it is True + # Append dictionary['granule_id']['path'] to the final safe to delete paths list + # If it is False + # Do nothing, just pass on this item (this means it is NOT safe to delete) + # Last Step (true for all methods) + # Write this list to a file + # + # + # Each Key here will represent only a granule ID + main_dict = {} + + # Add Initial MCP list as keys + # + # BenchMark Test: This for loop execution time for the largest dataset was: 0:00:11.454076 (about 12 seconds) + for mcp_granule_path in in_file__MCP__Granule_Paths_List: + + # Split the MCP Path down to JUST the granule ID # my_str.split('/')[-1] + mcp_granule_path__Granule_ID_ONLY = mcp_granule_path.split('/')[-1] + + # Create a new sub dictionary object to hold the info + new_dict_sub_object = {'path':mcp_granule_path, 'is_safe_to_delete': False} #, 'has_path': True} + + # Add a new key (using only the Granule ID as the key) and add the new sub dictionary object to that key + main_dict[mcp_granule_path__Granule_ID_ONLY] = new_dict_sub_object + + # See how many keys we have and get the list of keys into a separate object outside of the for loops. + main_dict_keys = list(main_dict.keys()) + num_of_MCP_GranuleID_keys = len(main_dict_keys) # len(main_dict.keys()) + # + print(f'(num_of_MCP_GranuleID_keys): {num_of_MCP_GranuleID_keys} ') + + # And output a DEBUG Statement Here at this point + # + # print(f'') + # print(f'(DEBUG): (num_of_MCP_GranuleID_keys): {num_of_MCP_GranuleID_keys} ') + # print(f'(DEBUG): (list(main_dict.keys())[75]): {list(main_dict.keys())[75]} ') + # print(f'') + # print(f'(DEBUG): Converting the original list into a set and then checking the count..') + # len_of_unique_MCP_Granule_Paths_List = len(list(set(in_file__MCP__Granule_Paths_List))) + # print(f'(DEBUG): (len_of_unique_MCP_Granule_Paths_List): {len_of_unique_MCP_Granule_Paths_List}') + # print(f'') + + + + # Add Debug Exit Here for testing! + # + # # Increment the outer loop + # counter__outer_loop_iterations = counter__outer_loop_iterations + 1 + # #if(counter__outer_loop_iterations > 322): + # if(counter__outer_loop_iterations > 100): + # print(f'(DEBUG): Found {num_of_granule_paths_found} granule paths...') + # print(f'(DEBUG): ...Breaking out of method 2 at outer loop count {counter__outer_loop_iterations}, so we can calculate the time for a fraction of the data') + # return + + # # Need to make this further efficient.. I can just overwrite objects if they exist already. + # # Iterate the GranuleID List from NGAP + # counter__ngap_loops = 0 + # counter__found = 0 + # # This loop only takes about 2 seconds to do 3 items (but I need to test it a bit more) + # # 10 seconds to find 101 granules. + # for ngap_granule_id in in_file__NGAP__Granule_List: + # if(ngap_granule_id in main_dict_keys): + # counter__found = counter__found + 1 + # # + # if(counter__found > 101): + # print(f'(DEBUG): (counter__found): {counter__found}. EXITING!') + # return + # # + # counter__ngap_loops = counter__ngap_loops + 1 + # # + # # if(counter__ngap_loops > 2): + # # print(f'(DEBUG): (counter__ngap_loops): {counter__ngap_loops}. EXITING NOW FOR TIMING PURPOSSES') + # # return + # print(f'(DEBUG): (counter__found): {counter__found}') + + # # More Efficient NGAP Iteration Method than the one that uses the .keys() and if statement check. + # BenchMark: + # # 2 seconds and did find the first 4 granule. + # # 2 seconds and did find the first 102 granules also (So the error on time calc here might just be a variance loading the lists!) + # # 7 Seconds and found ALL NGAP Granules + # + # + # Example: + # main_dict[mcp_granule_path__Granule_ID_ONLY] + # # {'path':mcp_granule_path, 'is_safe_to_delete': False} #, 'has_path': True} + counter__ngap_loops = 0 + counter__Found_Safe_To_Delete_Item = 0 + counter__NOT_FOUND = 0 + for ngap_granule_id in in_file__NGAP__Granule_List: + try: + main_dict[ngap_granule_id]['is_safe_to_delete'] = True + counter__Found_Safe_To_Delete_Item = counter__Found_Safe_To_Delete_Item + 1 + except: + ngap_granule_ids__not_found.append(ngap_granule_id) + counter__NOT_FOUND = counter__NOT_FOUND + 1 + + counter__ngap_loops = counter__ngap_loops + 1 + + # Debugging and Time Calcs + # + # #if(counter__ngap_loops > 3): + # if(counter__Found_Safe_To_Delete_Item > 101): + # print(f'(DEBUG): (counter__ngap_loops): {counter__ngap_loops}. ') + # print(f'(DEBUG): (counter__Found_Safe_To_Delete_Item): {counter__Found_Safe_To_Delete_Item}. ') + # print(f'(DEBUG): (counter__NOT_FOUND): {counter__NOT_FOUND}. ') + # print(f'(DEBUG): EXITING!') + # return + print(f'') + print(f'(NGAP_Loop_Done): (counter__ngap_loops): {counter__ngap_loops}') + print(f'(NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): {counter__Found_Safe_To_Delete_Item}') + print(f'(NGAP_Loop_Done): (counter__NOT_FOUND): {counter__NOT_FOUND}') + print(f'') + # + # (NGAP_Loop_Done): (counter__ngap_loops): 3111290 + # (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 3111290 + # (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + # + + + # Last Step - Examine all the Keys in the main_dict one by one and save the paths for the ones that have the proper flag set! + # + # Benchmark: about 1 second! We were up to 17 seconds when executing up to this point... After this part: 18 Seconds... + # + # For this step, just iterate all the dict keys and check to see if the 'is_safe_to_delete' flag has been modified. + for granule_id_key in main_dict_keys: + + # Check to see if this current granule ID IS safe to delete! + is_safe_to_delete = main_dict[granule_id_key]['is_safe_to_delete'] + if(is_safe_to_delete == True): + # If it is safe to delete, append the path to the final output list! + current_granule_path = main_dict[granule_id_key]['path'] + out_file__MCP__Filtered_Granule_Paths_List.append(current_granule_path) + + print(f'') + print(f'(Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): {len(out_file__MCP__Filtered_Granule_Paths_List)}') + print(f'') + + + + + ############################################## + # OLDER METHODS (that were too inefficient) + ############################################## + + + # ## This method takes 0:01:50.026468 time to find 101 granule paths out of millions + # # + # # Iterate the MCP Input List + # #for mcp_granule_path in in_file__MCP__Granule_Paths_List: + # for i, string in enumerate(in_file__MCP__Granule_Paths_List): + # + # # Inner Loop + # if any(term in string for term in in_file__NGAP__Granule_List): + # out_file__MCP__Filtered_Granule_Paths_List.append(i) # mcp_granule_path + # + # # Recount how many items we have + # num_of_granule_paths_found = len(out_file__MCP__Filtered_Granule_Paths_List) + # + # # Increment the outer loop + # counter__outer_loop_iterations = counter__outer_loop_iterations + 1 + # #if(counter__outer_loop_iterations > 322): + # if(counter__outer_loop_iterations > 100): + # print(f'(DEBUG): Found {num_of_granule_paths_found} granule paths...') + # print(f'(DEBUG): ...Breaking out of method 2 at outer loop count {counter__outer_loop_iterations}, so we can calculate the time for a fraction of the data') + # return + + # ## This method takes over 3 minutes to find 100 granules out of millions (TOO SLOW) + # # + # # Iterate the MCP Input List + # for mcp_granule_path in in_file__MCP__Granule_Paths_List: + # + # + # # Iterate the GranuleID List from NGAP + # for ngap_granule_id in in_file__NGAP__Granule_List: + # + # # Check to see if the NGAP Granule ID appears in the MCP Granule Path + # if (ngap_granule_id in mcp_granule_path): + # # If the granule ID does appear in the Path, then save the Path for writing later + # out_file__MCP__Filtered_Granule_Paths_List.append(mcp_granule_path) + # + # # Increment the inner loop counter + # counter__total_inner_loop_iterations = counter__total_inner_loop_iterations + 1 + # + # + # + # # Reporting + # if( (counter__total_inner_loop_iterations % frequency_of_reporting_inner_loops) == 0): + # progress_percent = (counter__total_inner_loop_iterations / theoretical_max_inner_loops) * 100 + # progress_percent__str = "%.2f}" % progress_percent # Convert float to 2 decimal point string. + # print(f' Progress: ({progress_percent__str}) ( {counter__total_inner_loop_iterations} out of {theoretical_max_inner_loops} )') + # + # + # + # print(f' OUTER ITEM: (mcp_granule_path): {mcp_granule_path}') + # print(f' INNER ITEM: (ngap_granule_id): {ngap_granule_id}') + # print(f' (counter__total_inner_loop_iterations): {counter__total_inner_loop_iterations}') + # print(f' (counter__outer_loop_iterations): {counter__outer_loop_iterations}') + # print(f' Current Num of Granules Found so far: {len(out_file__MCP__Filtered_Granule_Paths_List)}') + # + # # Debugging and timing - Figuring out how long it will take to iterate one of these lists. + # #if(counter__total_inner_loop_iterations > 1000000): # 1 Million + # #if(counter__total_inner_loop_iterations > 10000000): # 10 Million + # #if(counter__total_inner_loop_iterations > 100000000): # 100 Million + # if(counter__total_inner_loop_iterations > 1000000000): # 1 Billion + # print(f'') + # print(f'......') + # print(f' BREAKING OUT because I want to see how long a specific number of inner iterations takes:') + # return + # + # + # # Incrementing the outer loop counter + # counter__outer_loop_iterations = counter__outer_loop_iterations + 1 + + except Exception as e: + print(f'process_lists: ERROR: Error while Actually processing and filtering lists. System Message: {e}') + pass + + # Write the Output File + try: + # Write the Output to the Out File + out_file__MCP__Filtered_Granule_Paths_List + + # Save the new list to a new file at 'out_file' (overwriting if it already exists) + with open(out_file__MCP, 'w') as out_file: + # Iterate each line in the list. + for line_to_write in out_file__MCP__Filtered_Granule_Paths_List: + + # Add Formatting if the setting is set to do so. + if(SETTINGS__Add_Formatting_To_Final_Output == True): + line_to_write = add_formatting_to_string(in_str=line_to_write) + + # Write the line and a newline char ('\n') at the end + #out_file.write(line_to_write+'\n') + out_file.write(line_to_write) + + pass + except Exception as e: + print(f'process_lists: ERROR: Error Writing Final Output MCP Safe To Delete Granule Paths List. System Message: {e}') + + + + except Exception as e: + print(f'process_lists: ERROR (top level): An error occured: {e}. Input Object: (process_obj): {process_obj}') + pass + + # Output the Counts + print(f'') + print(f'Item Counts (After Processing)') + print(f' in_file__MCP__Granule_Paths_List: {len(in_file__MCP__Granule_Paths_List)}') + print(f' in_file__NGAP__Granule_List: {len(in_file__NGAP__Granule_List)}') + print(f' out_file__MCP__Filtered_Granule_Paths_List: {len(out_file__MCP__Filtered_Granule_Paths_List)}') + print(f' Total Inner Loop Iterations: {counter__total_inner_loop_iterations}') + counter__total_inner_loop_iterations + + + +# Entry Point +def main(): + print(f'main: STARTED') + datetime__START = datetime.datetime.utcnow() + + print(f'') + print(f'Using OLD_NGAP MAXAR lists to Filter MCP Single Granule Paths List down to the MCP Safe_To_Delete Lists of single Granule Paths...') + print(f'') + print(f'Settings Items:') + print(f' SETTINGS__Processing_Objects: {SETTINGS__Processing_Objects}') + # + for process_obj in SETTINGS__Processing_Objects: + print(f'---------------------------------------------------------------') + print(f'About to Process Item: (process_obj): {process_obj}') + #print(f'TODO -- Finish writing this function') + process_lists(process_obj=process_obj) + print(f'---------------------------------------------------------------') + + datetime__END = datetime.datetime.utcnow() + total_time__str = str(datetime__END-datetime__START) + print(f'main: Reached the End -- Total Execution Time: {total_time__str}') + +main() + + + + +# Figuring out how long this script takes to run +# +# # Loading all of the settings and loading all of the data into memory, but NOT YET Actually Processing the loop and the nested loop. +# main: Reached the End -- Total Execution Time: 0:00:05.889644 +# +# # Running a single dataset up to 1 million inner loop iterations. +# ➜ mcp_MAXAR_deletes__q4_2024 python step_04__getting_final_MCP_Safe_To_Delete_paths.py +# main: STARTED +# Using OLD_NGAP MAXAR lists to Filter MCP Single Granule Paths List down to the MCP Safe_To_Delete Lists of single Granule Paths... +# Settings Items: +# SETTINGS__Processing_Objects: [{'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}] +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# BREAKING OUT because I want to see how long 1 million inner iterations takes: +# OUTER ITEM: (mcp_granule_path): css/nga/WV02/1B/2021/293/WV02_10300100C879D000_P1BS_505826846030_01/WV02_20211020032708_10300100C879D000_21OCT20032708-P1BS-505826846030_01_P009 +# INNER ITEM: (ngap_granule_id): WV02_20200918100842_10300100AD4B6E00_20SEP18100842-M1BS-504711927020_01_P006 +# (counter__total_inner_loop_iterations): 1000001 +# (counter__outer_loop_iterations): 0 +# --------------------------------------------------------------- +# main: Reached the End -- Total Execution Time: 0:00:03.163323 +# +# +# # For 10,000,000 +# Progress: (0.00}) ( 10000000 out of 32169683872690 ) +# main: Reached the End -- Total Execution Time: 0:00:04.776866 +# +# +# # For 100,000,000 +# Progress: (0.00}) ( 100000000 out of 32169683872690 ) +# OUTER ITEM: (mcp_granule_path): css/nga/WV02/1B/2013/215/WV02_1030010025555200_P1BS_506721258040_01/WV02_20130803003409_1030010025555200_13AUG03003409-P1BS-506721258040_01_P010 +# INNER ITEM: (ngap_granule_id): WV02_20180531110134_103001007E769200_18MAY31110134-M1BS-502225839100_01_P001 +# (counter__total_inner_loop_iterations): 100000000 +# (counter__outer_loop_iterations): 32 +# Current Num of Granules Found so far: 10 +# main: Reached the End -- Total Execution Time: 0:00:20.427198 +# +# +# # Method 1 -- For 1,000,000,000 (Method 1 - Got to 321 Granules) +# Progress: (0.00}) ( 1000000000 out of 32169683872690 ) +# OUTER ITEM: (mcp_granule_path): css/nga/WV02/1B/2020/304/WV02_10300100B0738400_M1BS_504825608050_01/WV02_20201030110450_10300100B0738400_20OCT30110450-M1BS-504825608050_01_P002 +# INNER ITEM: (ngap_granule_id): WV02_20200521051728_10300100A73CFC00_20MAY21051728-M1BS-504321594010_01_P001 +# (counter__total_inner_loop_iterations): 1000000000 +# (counter__outer_loop_iterations): 321 +# Current Num of Granules Found so far: 101 +# main: Reached the End -- Total Execution Time: 0:02:56.797226 + +# # Method 2 - Up to 323 granules (to match a previous test) +# ➜ mcp_MAXAR_deletes__q4_2024 python step_04__getting_final_MCP_Safe_To_Delete_paths.py +# main: STARTED +# Using OLD_NGAP MAXAR lists to Filter MCP Single Granule Paths List down to the MCP Safe_To_Delete Lists of single Granule Paths... +# Settings Items: +# SETTINGS__Processing_Objects: [{'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}] +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (DEBUG): Found 101 granule paths... +# (DEBUG): ...Breaking out of method 2 at outer loop count 323, so we can calculate the time for a fraction of the data +# --------------------------------------------------------------- +# main: Reached the End -- Total Execution Time: 0:01:50.026468 + +# # Method 2 -- Again, but only up to 101 granules (to actually match Method 1's trial) + + + +# # Dictionary Method Output, All the way to a final output! (WV02_MSI_L1B) +# +#➜ mcp_MAXAR_deletes__q4_2024 python step_04__getting_final_MCP_Safe_To_Delete_paths.py +# main: STARTED +# +# Using OLD_NGAP MAXAR lists to Filter MCP Single Granule Paths List down to the MCP Safe_To_Delete Lists of single Granule Paths... +# +# Settings Items: +# SETTINGS__Processing_Objects: [{'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}] +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 10329679 +# +# (NGAP_Loop_Done): (counter__ngap_loops): 3111290 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 3111290 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 +# +# +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 3111290 +# +# +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 10339661 +# in_file__NGAP__Granule_List: 3111290 +# out_file__MCP__Filtered_Granule_Paths_List: 3111290 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# main: Reached the End -- Total Execution Time: 0:00:24.016042 +# ➜ mcp_MAXAR_deletes__q4_2024 + + + + +# # For Reference, Here are the config settings from Step 03 +# +# SETTINGS__is_run__OLD_NGAP_MAXAR = False #True +# SETTINGS__MODE__OLD_NGAP_MAXAR_Paths = [ +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'}, +# {'in_file':'step_03__GettingGranuleIDs/last_time__safe_to_delete_lists/safe_to_delete_lists__RAW/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__csdap-cumulus-prod-public.csv', 'out_file': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt'} +# ] +# +# +# # Settings for MCP_MAXAR +# SETTINGS__is_run__MCP_MAXAR = True +# SETTINGS__MODE__MCP_MAXAR_Paths = [ +# {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__GE01_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt'}, +# {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV01_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt'}, +# {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV02_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt'}, +# {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV03_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt'}, +# {'in_file':'step_02__filtering_large_manifests_down/filtered_lists/MCP__Delivery_Bucket__WV04_1B__BOTH.txt', 'out_file': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt'} +# ] + + + + + + + + + + + +# Final Output (Before adding the last bit of formatting code) +# +# ➜ mcp_MAXAR_deletes__q4_2024 python step_04__getting_final_MCP_Safe_To_Delete_paths.py +# main: STARTED + +# Using OLD_NGAP MAXAR lists to Filter MCP Single Granule Paths List down to the MCP Safe_To_Delete Lists of single Granule Paths... + +# Settings Items: +# SETTINGS__Processing_Objects: [{'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/GE01_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV01_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV03_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV03_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV04_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}, {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV04_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'}] +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__GE01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/GE01_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/GE01_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 1819068 + +# (NGAP_Loop_Done): (counter__ngap_loops): 722623 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 722623 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 722623 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 1845101 +# in_file__NGAP__Granule_List: 722623 +# out_file__MCP__Filtered_Granule_Paths_List: 722623 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV01_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV01_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV01_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 5074494 + +# (NGAP_Loop_Done): (counter__ngap_loops): 5023086 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 5023086 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 5023086 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 5078347 +# in_file__NGAP__Granule_List: 5023086 +# out_file__MCP__Filtered_Granule_Paths_List: 5023086 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV02_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV02_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV02_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 10329679 + +# (NGAP_Loop_Done): (counter__ngap_loops): 3111290 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 3111290 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 3111290 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 10339661 +# in_file__NGAP__Granule_List: 3111290 +# out_file__MCP__Filtered_Granule_Paths_List: 3111290 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV03_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 4626037 + +# (NGAP_Loop_Done): (counter__ngap_loops): 1882712 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 1882712 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 1882712 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 4627513 +# in_file__NGAP__Granule_List: 1882712 +# out_file__MCP__Filtered_Granule_Paths_List: 1882712 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV03_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV03_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV03_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 4626037 + +# (NGAP_Loop_Done): (counter__ngap_loops): 2515801 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 2515801 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 2515801 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 4627513 +# in_file__NGAP__Granule_List: 2515801 +# out_file__MCP__Filtered_Granule_Paths_List: 2515801 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_MSI_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV04_MSI_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 12705 + +# (NGAP_Loop_Done): (counter__ngap_loops): 6753 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 5950 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 803 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 5950 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 12705 +# in_file__NGAP__Granule_List: 6753 +# out_file__MCP__Filtered_Granule_Paths_List: 5950 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# --------------------------------------------------------------- +# About to Process Item: (process_obj): {'in_file__MCP': 'step_03__GettingGranuleIDs/MCP_Single_Granule_Only_Lists/MCP__Delivery_Bucket__Path_To_Granule__WV04_1B__BOTH.txt', 'in_file__NGAP': 'step_03__GettingGranuleIDs/OLD_NGAP_Single_Granule_Only_Lists/WV04_PAN_L1B___1__SAFE_TO_DELETE__OLD_NGAP__GRANULE_ONLY_LIST.txt', 'out_file__MCP': 'step_04__Get_Final_MCP_SafeToDelete_Lists/MCP_Single_Granule_PAths_Safe_To_Delete/WV04_PAN_L1B__Safe_To_Delete_MCP_Granule_Paths.txt'} +# (num_of_MCP_GranuleID_keys): 12705 + +# (NGAP_Loop_Done): (counter__ngap_loops): 6753 +# (NGAP_Loop_Done): (counter__Found_Safe_To_Delete_Item): 6753 +# (NGAP_Loop_Done): (counter__NOT_FOUND): 0 + + +# (Filtering_Done): (len(out_file__MCP__Filtered_Granule_Paths_List)): 6753 + + +# Item Counts (After Processing) +# in_file__MCP__Granule_Paths_List: 12705 +# in_file__NGAP__Granule_List: 6753 +# out_file__MCP__Filtered_Granule_Paths_List: 6753 +# Total Inner Loop Iterations: 0 +# --------------------------------------------------------------- +# main: Reached the End -- Total Execution Time: 0:00:49.518011 +# ➜ mcp_MAXAR_deletes__q4_2024 + diff --git a/utils/mcp_maxar_deletes/step_05__mcp_lambda_to_delete_s3_files__final.py b/utils/mcp_maxar_deletes/step_05__mcp_lambda_to_delete_s3_files__final.py new file mode 100644 index 0000000..55fb90f --- /dev/null +++ b/utils/mcp_maxar_deletes/step_05__mcp_lambda_to_delete_s3_files__final.py @@ -0,0 +1,284 @@ +# step_05__mcp_lambda_to_delete_s3_files__final.py + +# step_05__mcp_lambda_to_delete_s3_files.py + +# Parse the Path from the input +# + + + +import time +import random +import json +import boto3 +import hashlib +import sys + +# Get the needed S3 Boto Client +s3 = boto3.client('s3') + +# Setting this to True will create significant output to Cloudwatch Logs +SETTING__IS_OUTPUT_DEBUG_MODE = False #True #False # True # False + +# List of all the possible extensions for MAXAR Granules +#SETTINGS__all_extensions_MAXAR = ['-BROWSE.jpg','-cmr.json','-thumb.jpg','.rename','.tar','.tif','.xml'] +SETTINGS__all_extensions_MAXAR = ['.ntf'] + +# When we need to print the output to the logs to see what is going on +def debug_print(str_to_print="", obj_out=None): + if(SETTING__IS_OUTPUT_DEBUG_MODE == True): + print(f'{str_to_print}: {obj_out}') + +# Without this, we hit the throtle limits when running the S3 batch operation. +def random_sleep(): + sleep_time = random.uniform(0.01, 0.1) # Select a random number between 0.01 and 0.10 + debug_print(str_to_print="Sleeping for: " + str(sleep_time) + " seconds.") + time.sleep(sleep_time) + +# I want to verify that we are ONLY deleting files from the correct bucket and ONLY paths that include the expected initial paths. +def validate__is_correct_bucket_and_root_path(input_bucket_name='', input_key_path=''): + is_safe_to_delete = False + validation_message = '' + + expected_bucket = 'csdap-maxar-delivery' + expected_root_paths = ['css/nga/WV04/1B/', 'css/nga/WV03/1B/', 'css/nga/WV02/1B/', 'css/nga/WV01/1B/', 'css/nga/GE01/1B/'] + if(input_bucket_name == expected_bucket): + # Now Check to make sure the first set of characters in the path exactly matches one of the expected root paths. + if(input_key_path[0:16] in expected_root_paths): + # At this point, we have passed both validation checks. + is_safe_to_delete = True + validation_message = '' + else: + # Looks like the root path is not one of the tightly controlled, expected paths. Do not attempt a delete! + is_safe_to_delete = False + validation_message = '|| Input Key Path has a root path that is not one of the expected paths. (input_key_path[0:16]): ' + str(input_key_path[0:16]) + ' is not found in the list (expected_root_paths): ' + str(expected_root_paths) + ' ' + else: + is_safe_to_delete = False + validation_message += '|| Wrong Bucket Name: (input_bucket_name): ' + str(input_bucket_name) + ' does not equal (expected_bucket): ' + str(expected_bucket) + ' ' + + return is_safe_to_delete, validation_message + + +# Actually delete a file from S3 +def execute_s3_delete(bucket_name='', key_path=''): + # Values to return + did_delete = False + error_message = '' + + # First Validate that we can delete this file (based on hard coded validation values) + is_pass_validation, validation_message = validate__is_correct_bucket_and_root_path(input_bucket_name=bucket_name, input_key_path=key_path) + + # Did we pass the Validation? + if(is_pass_validation == False): + did_delete = False + error_message = validation_message + else: + try: + #debug_print(str_to_print="execute_s3_delete", obj_out=f'TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): {bucket_name}, (key_path): {key_path}') + s3.delete_object(Bucket=bucket_name, Key=key_path) + did_delete = True + error_message = '' + except: + did_delete = False + err_info = str(sys.exc_info()) + error_message = f'failed to delete {key_path}. Error Message: {err_info}' + + # Return the result, If we did delete and any error messages. + return did_delete, error_message + +# This function will process the lists, and +def execute_process_granule_key_path_list(bucket_name='', list_of_s3_key_paths_to_delete=[], original_key_path='', original_key_path_to_granule='UNSET'): + file_exts_removed = [] + error_exts = [] + error_messages = [] + + # Iterate the entire list + for full_key_path in list_of_s3_key_paths_to_delete: + current_extension = full_key_path.split(original_key_path_to_granule)[1] # Should Convert a Full Keypath back into a string like this: '-BROWSE.jpg' + + # Attempt to delte the file + did_delete, error_message = execute_s3_delete(bucket_name=bucket_name, key_path=full_key_path) + + if(did_delete == True): + file_exts_removed.append(current_extension) + else: + error_exts.append(current_extension) + error_messages.append(error_message) + + return file_exts_removed, error_exts, error_messages + +# Example of all possible files deleted ['-BROWSE.jpg','-cmr.json','-thumb.jpg','.rename','.tar','.tif','.xml'] +def get_key_paths_to_files(input_key_path_to_granule_id=''): + # SETTINGS__all_extensions_MAXAR = ['-BROWSE.jpg','-cmr.json','-thumb.jpg','.rename','.tar','.tif','.xml'] + ret_list = [] + for ext_item in SETTINGS__all_extensions_MAXAR: + ret_list.append(f'{input_key_path_to_granule_id}{ext_item}') + return ret_list + + +def lambda_handler(event, context): + run_did_fail = False + err_info = "" + success_info = "" + try: + debug_print(str_to_print="Starting a new run") + + # Looking at the Event Object: + debug_print(str_to_print="Event Object", obj_out=event) + + # Extract bucket name and key from the event + s3BucketArn = event['tasks'][0]['s3BucketArn'] + s3Key = event['tasks'][0]['s3Key'] + debug_print(str_to_print="s3BucketArn", obj_out=s3BucketArn) # TODO -- Update This Example to what is current: arn:aws:s3:::csdap-cumulus-prod-internal + debug_print(str_to_print="s3Key", obj_out=s3Key) # TODO -- Update This Example to what is current: kstest/cmr_backups/planet/PSScene3Band/20150601_090322_090c_cmr.old + + # Split the CSV line to get the bucket and key + src_bucket_name = s3BucketArn.split(':::')[1] + src_key_path = s3Key + + # Strip any extra spaces and quotes + src_bucket_name = src_bucket_name.strip() + src_key_path = src_key_path.strip() + debug_print(str_to_print="src_bucket_name", obj_out=src_bucket_name) # csdap-maxar-delivery + debug_print(str_to_print="src_key_path", obj_out=src_key_path) # css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002 + + # Key Path to Granule ID + key_path__to__granule_id = src_key_path # Exact copy of the source + + # List of files to delete (Use the key_path to create a list of file extensions) + list_of_s3_key_paths_to_delete = get_key_paths_to_files(input_key_path_to_granule_id=key_path__to__granule_id) + debug_print(str_to_print="list_of_s3_key_paths_to_delete", obj_out=list_of_s3_key_paths_to_delete) # list_of_s3_key_paths_to_delete: ['css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-BROWSE.jpg', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-cmr.json', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-thumb.jpg', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.rename', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tar', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tif', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.xml'] + + # Sleep for a very short amount of time to prevent throttle limit -- BEFORE ANY S3 Operations + random_sleep() + + # Process the List now (Iterate and Delete all 7 expected files) + # file_exts_removed = [] # Example of all 7 files deleted ['-BROWSE.jpg','-cmr.json','-thumb.jpg','.rename','.tar','.tif'] + file_exts_removed, error_exts, error_messages = execute_process_granule_key_path_list(bucket_name=src_bucket_name, list_of_s3_key_paths_to_delete=list_of_s3_key_paths_to_delete, original_key_path_to_granule=src_key_path) + + # After Running, print the results + debug_print(str_to_print="file_exts_removed", obj_out=file_exts_removed) # ['-BROWSE.jpg', '-cmr.json', '-thumb.jpg', '.rename', '.tar', '.tif', '.xml'] + debug_print(str_to_print="error_exts", obj_out=error_exts) # [] + debug_print(str_to_print="error_messages", obj_out=error_messages) # [] + + # Passing the invocation ID back in the success info. + return { + 'statusCode': 200, + 'invocationSchemaVersion': event['invocationSchemaVersion'], + 'invocationId': event['invocationId'], + 'results': [ + { + 'taskId': event['tasks'][0]['taskId'], + 'resultCode': 'Succeeded', + 'resultString': f'Removed: {file_exts_removed} Errors: {error_exts}, Error Messages: {error_messages}' + } + ] + } + + except: + run_did_fail = True + success_info = "" + err_info = str(sys.exc_info()) + # + return { + 'statusCode': 500, + 'err_info': f'{err_info}' + } + + + +# EXAMPLE of the Test Input +# { +# "invocationId": "some_long_string", +# "job": { +# "id": "e7306709-ea94-4dc0-863b-5c0d1bd20ee3" +# }, +# "tasks": [ +# { +# "taskId": "AAAAAAAAAAExFKoqBbA5bbIDWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ", +# "s3BucketArn": "arn:aws:s3:::csdap-maxar-delivery", +# "s3Key": "css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002", +# "s3VersionId": "None" +# } +# ], +# "invocationSchemaVersion": "1.0" +# } + + +# Example Success Output (with Debug Print Turned on, Before the REAL delete was turned on) +# Response: +# { +# "statusCode": 200, +# "invocationSchemaVersion": "1.0", +# "invocationId": "some_long_string", +# "results": [ +# { +# "taskId": "AAAAAAAAAAExFKoqBbA5bbIDWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ", +# "resultCode": "Succeeded", +# "resultString": "Removed: ['-BROWSE.jpg', '-cmr.json', '-thumb.jpg', '.rename', '.tar', '.tif', '.xml'] Errors: [], Error Messages: []" +# } +# ] +# } +# +# Function Logs: +# Tb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ', 's3BucketArn': 'arn:aws:s3:::csdap-maxar-delivery', 's3Key': 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002', 's3VersionId': 'None'}], 'invocationSchemaVersion': '1.0'} +# s3BucketArn: arn:aws:s3:::csdap-maxar-delivery +# s3Key: css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002 +# src_bucket_name: csdap-maxar-delivery +# src_key_path: css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002 +# list_of_s3_key_paths_to_delete: ['css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-BROWSE.jpg', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-cmr.json', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-thumb.jpg', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.rename', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tar', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tif', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.xml'] +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-BROWSE.jpg +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-cmr.json +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-thumb.jpg +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.rename +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tar +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tif +# execute_s3_delete: TODO - Uncomment the next line to ACTUALLY DELETE A FILE (bucket_name): csdap-maxar-delivery, (key_path): css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.xml +# file_exts_removed: ['-BROWSE.jpg', '-cmr.json', '-thumb.jpg', '.rename', '.tar', '.tif', '.xml'] +# error_exts: [] +# error_messages: [] +# END RequestId: 6d6d5067-693d-4ef6-8ec5-91f0da658a15 +# REPORT RequestId: 6d6d5067-693d-4ef6-8ec5-91f0da658a15 Duration: 13.74 ms Billed Duration: 14 ms Memory Size: 128 MB Max Memory Used: 85 MB Init Duration: 442.43 ms + +# Request ID: 6d6d5067-693d-4ef6-8ec5-91f0da658a15 + + + +# Full Example with Real Delete (Single Granule) +# +# Status: Succeeded +# Test Event Name: WV04_PAN_L1B_2017 +# +# Response: +# { +# "statusCode": 200, +# "invocationSchemaVersion": "1.0", +# "invocationId": "some_long_string", +# "results": [ +# { +# "taskId": "AAAAAAAAAAExFKoqBbA5bbIDWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ", +# "resultCode": "Succeeded", +# "resultString": "Removed: ['-BROWSE.jpg', '-cmr.json', '-thumb.jpg', '.rename', '.tar', '.tif', '.xml'] Errors: [], Error Messages: []" +# } +# ] +# } +# +# Function Logs: +# DWZB9c7NGhU0gGZLhY6jh/Lp6RiPJDFpU9bJ3KtvjxmOl9BwUPDHR9+qXkcXkYS2PO0Rb9ja6QTGRqWG7NHM4/xuLk3iBSMOxUKYSe7H7aNoFXHxSU+MFTPTxQcYIAcUQjYlLQbxa3EJP+qUTiJRJGWW/YZCDHkCo9tVQJCDyDHFs7fi/84z4g5SgCTencnb9OjD7kUuPA8as/pqRAyKhor83bk0fVI/rvZWwQPPWQmf4Y1aqhSd0ao/kf2qhlY99oOHNYWsJ3OedeWy/2d52K3RyadUDRARTLHqhs6hYl0qcPDW9pEY+cn8v9h8mHOZY4dCslCDrUoowtGb4hvenUC+fsdzkqb+x5k4THjuf3iLFxNMBkGtPrx5EWH5AviYZn3vo95ZioT9O2zIkmBgOq/kxNglsUFfwZzw2aRx4jQtRSR3BAmnA6sWFsPfslJNInYe1fGm4142II9dNR41lTyKQlmw/1DUieXEyVREEy3YLkewSDNzW+EOYKJjKrXwpKc+1yISxJrVJTAwWC0+pG/MaZlLBR3oWjBP33zOZTb+b3FmAteDMWrgsDM8ztSZGYUdy/TiNXHRQeLAs4zSb59qnsb0morzA4lOx8OKgegH2RmyzG+QJrm7Udr9/6do4zhKHAdjdJjYt6dQ8NDHLIUtvUh9Dp8d8pai2Ugiu17wTuQXhdGU8DzMcddZc39kRVHt0rCqKRt8u73BgDZo4faT5UJjWryzzKygtpxhMVdTqS0xMvdwdACDzOTyQ94W2Lhs0/yXxfKziesoAPcquUdFwc8J759rCDohGpisotOG62BfykeGTuw69WF278sIKCxFLkU7axw7Iybp7s0IC0P9FG7p1KZXNdyrU4h3oYU/kww+kQC+0j690rQa9/Db3pAsNdgQFRTDsrDrmHX3P7A+4P2RY3fzNJ0LDHXgqsmU+MUjlfbEiAAHxVjdCvFD+69+rCnjB27lY1FxnlJtx48RA6amOcXpU2Dc2qL1/zBdQ', 's3BucketArn': 'arn:aws:s3:::csdap-maxar-delivery', 's3Key': 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002', 's3VersionId': 'None'}], 'invocationSchemaVersion': '1.0'} +# s3BucketArn: arn:aws:s3:::csdap-maxar-delivery +# s3Key: css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002 +# src_bucket_name: csdap-maxar-delivery +# src_key_path: css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002 +# list_of_s3_key_paths_to_delete: ['css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-BROWSE.jpg', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-cmr.json', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002-thumb.jpg', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.rename', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tar', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.tif', 'css/nga/WV04/1B/2017/203/WV04_ab212ea2-85fb-4564-923c-c0286bc82900-inv_X1BS_059096996120_01/WV04_20170722002258_ab212ea2-85fb-4564-923c-c0286bc82900-inv_17JUL22002258-P1BS-059096996120_01_P002.xml'] +# file_exts_removed: ['-BROWSE.jpg', '-cmr.json', '-thumb.jpg', '.rename', '.tar', '.tif', '.xml'] +# error_exts: [] +# error_messages: [] +# END RequestId: ed03f3c5-2555-4a0b-9d7a-e2910e05c1aa +# REPORT RequestId: ed03f3c5-2555-4a0b-9d7a-e2910e05c1aa Duration: 498.93 ms Billed Duration: 499 ms Memory Size: 128 MB Max Memory Used: 86 MB Init Duration: 447.80 ms +# +# Request ID: ed03f3c5-2555-4a0b-9d7a-e2910e05c1aa + + +# One Last Test before starting Batch Operations -- Turning on the Sleep Timer to stagger the executions +# Sleeping for: 0.07825984200370091 seconds.: None +# REPORT RequestId: bf8e6c84-f498-475d-8d23-7847c74decff Duration: 563.97 ms Billed Duration: 564 ms Memory Size: 128 MB Max Memory Used: 86 MB Init Duration: 447.19 ms \ No newline at end of file