From 25644f567981e897690a4731b2d7c19bf155bac9 Mon Sep 17 00:00:00 2001 From: Chunyu Ma Date: Fri, 20 Oct 2023 11:30:20 -0400 Subject: [PATCH 1/2] change all query_name and match_name to string --- srcs/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/srcs/utils.py b/srcs/utils.py index 4b7d974..34808f4 100644 --- a/srcs/utils.py +++ b/srcs/utils.py @@ -113,6 +113,10 @@ def run_multisearch(num_threads: int, ani_thresh: float, ksize: int, scale: int, B_TO_A = A_TO_B[['match_name','query_name']].rename(columns={'match_name':'query_name','query_name':'match_name'}) multisearch_result = pd.concat([A_TO_B, B_TO_A]).drop_duplicates().reset_index(drop=True) + # change column type to string + multisearch_result['query_name'] = multisearch_result['query_name'].astype(str) + multisearch_result['match_name'] = multisearch_result['match_name'].astype(str) + return multisearch_result def remove_corr_organisms_from_ref(sig_info_dict: Dict[str, Tuple[str, float, int, int]], multisearch_result: pd.DataFrame) -> Tuple[Dict[str, List[str]], pd.DataFrame]: From 649fd61d2d3b87f85f549c0ff09e6e1f7bc2d302 Mon Sep 17 00:00:00 2001 From: Chunyu Ma Date: Fri, 20 Oct 2023 11:53:03 -0400 Subject: [PATCH 2/2] allow to overwrite the existing temporary directory for sample --- srcs/hypothesis_recovery_src.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/srcs/hypothesis_recovery_src.py b/srcs/hypothesis_recovery_src.py index a542b5e..483c24e 100644 --- a/srcs/hypothesis_recovery_src.py +++ b/srcs/hypothesis_recovery_src.py @@ -254,8 +254,11 @@ def hypothesis_recovery( sample_dir = os.path.dirname(sample_file) sample_name = os.path.basename(sample_file).replace('.sig.zip', '') path_to_sample_temp_dir = os.path.join(sample_dir, f'sample_{sample_name}_intermediate_files') - if not os.path.exists(path_to_sample_temp_dir): - os.makedirs(path_to_sample_temp_dir) + if os.path.exists(path_to_sample_temp_dir): + # if exists, remove it + logger.info(f"Removing existing temporary directory: {path_to_sample_temp_dir}") + os.system(f'rm -rf {path_to_sample_temp_dir}') + os.makedirs(path_to_sample_temp_dir) # Find the organisms that have non-zero overlap with the sample nontrivial_organism_names = get_organisms_with_nonzero_overlap(manifest, sample_file, scale, ksize, num_threads, path_to_genome_temp_dir, path_to_sample_temp_dir)