Skip to content

Commit

Permalink
Merge pull request #45 from KoslickiLab/update_removing_corr_genomes
Browse files Browse the repository at this point in the history
Update removing corr genomes
  • Loading branch information
dkoslicki authored Oct 23, 2023
2 parents 8e082e8 + 649fd61 commit fa4762d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
7 changes: 5 additions & 2 deletions srcs/hypothesis_recovery_src.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,11 @@ def hypothesis_recovery(
sample_dir = os.path.dirname(sample_file)
sample_name = os.path.basename(sample_file).replace('.sig.zip', '')
path_to_sample_temp_dir = os.path.join(sample_dir, f'sample_{sample_name}_intermediate_files')
if not os.path.exists(path_to_sample_temp_dir):
os.makedirs(path_to_sample_temp_dir)
if os.path.exists(path_to_sample_temp_dir):
# if exists, remove it
logger.info(f"Removing existing temporary directory: {path_to_sample_temp_dir}")
os.system(f'rm -rf {path_to_sample_temp_dir}')
os.makedirs(path_to_sample_temp_dir)

# Find the organisms that have non-zero overlap with the sample
nontrivial_organism_names = get_organisms_with_nonzero_overlap(manifest, sample_file, scale, ksize, num_threads, path_to_genome_temp_dir, path_to_sample_temp_dir)
Expand Down
4 changes: 4 additions & 0 deletions srcs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ def run_multisearch(num_threads: int, ani_thresh: float, ksize: int, scale: int,
B_TO_A = A_TO_B[['match_name','query_name']].rename(columns={'match_name':'query_name','query_name':'match_name'})
multisearch_result = pd.concat([A_TO_B, B_TO_A]).drop_duplicates().reset_index(drop=True)

# change column type to string
multisearch_result['query_name'] = multisearch_result['query_name'].astype(str)
multisearch_result['match_name'] = multisearch_result['match_name'].astype(str)

return multisearch_result

def remove_corr_organisms_from_ref(sig_info_dict: Dict[str, Tuple[str, float, int, int]], multisearch_result: pd.DataFrame) -> Tuple[Dict[str, List[str]], pd.DataFrame]:
Expand Down

0 comments on commit fa4762d

Please sign in to comment.