Skip to content

Commit

Permalink
Reverted changes related to blastdb_aliastool and BLAST v4 databases.…
Browse files Browse the repository at this point in the history
… Using BLAST v5 databases.
  • Loading branch information
rfm-targa committed Jul 11, 2024
1 parent 885965e commit 8b5b4a2
Show file tree
Hide file tree
Showing 7 changed files with 568 additions and 498 deletions.
29 changes: 24 additions & 5 deletions CHEWBBACA/AlleleCall/allele_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -1749,7 +1749,8 @@ def create_missing_fasta(class_files, fasta_file, input_map, dna_hashtable,

def select_representatives(representative_candidates, locus, fasta_file,
iteration, output_directory, blastp_path,
blast_db, blast_score_ratio, threads):
blast_db, blast_score_ratio, threads,
blastdb_aliastool_path):
"""Select new representative alleles for a locus.
Parameters
Expand All @@ -1774,6 +1775,9 @@ def select_representatives(representative_candidates, locus, fasta_file,
BLAST Score Ratio value.
threads : int
Number of threads passed to BLAST.
blastdb_aliastool_path : str or None
Path to the `blastdb_aliastool` executable used to convert
the list of seqids to binary format.
Returns
-------
Expand All @@ -1788,6 +1792,12 @@ def select_representatives(representative_candidates, locus, fasta_file,
ids_file = fo.join_paths(output_directory,
['{0}_candidates_ids_{1}.fasta'.format(locus, iteration)])
fo.write_lines(list(representative_candidates.keys()), ids_file)
# Convert to binary format if BLAST>=2.10
binary_file = f'{ids_file}.bin'
blastp_std = bw.run_blastdb_aliastool(blastdb_aliastool_path,
ids_file,
binary_file)
ids_file = binary_file

# BLASTp to compare all candidates
blast_output = fo.join_paths(output_directory,
Expand Down Expand Up @@ -2270,9 +2280,10 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
clustering_dir = fo.join_paths(temp_directory, ['4_clustering'])
fo.create_directory(clustering_dir)

# Define BLASTp and makeblastdb paths
# Define BLASTp, makeblastdb and blastdb_aliastool paths
blastp_path = fo.join_paths(config['BLAST path'], [ct.BLASTP_ALIAS])
makeblastdb_path = fo.join_paths(config['BLAST path'], [ct.MAKEBLASTDB_ALIAS])
blastdb_aliastool_path = fo.join_paths(config['BLAST path'], [ct.BLASTDB_ALIASTOOL_ALIAS])

# Concatenate representative FASTA files
concat_reps = fo.join_paths(reps_protein_dir, ['loci_to_call_translated_representatives.fasta'])
Expand All @@ -2293,7 +2304,8 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
self_scores = cf.determine_self_scores(concat_full_reps, self_score_dir,
makeblastdb_path, blastp_path,
'prot',
config['CPU cores'])
config['CPU cores'],
blastdb_aliastool_path)
fo.pickle_dumper(self_scores, self_score_file)
else:
self_scores = fo.pickle_loader(self_score_file)
Expand Down Expand Up @@ -2347,6 +2359,7 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
clustering_dir, blastp_path,
makeblastdb_path,
config['CPU cores'],
blastdb_aliastool_path
True)

blast_files = im.flatten_list(blast_results)
Expand Down Expand Up @@ -2477,6 +2490,11 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
# Create text file with unclassified seqids
remaining_seqids_file = fo.join_paths(iteration_directory, ['unclassified_seqids_{0}.txt'.format(iteration)])
fo.write_lines(unclassified_seqids, remaining_seqids_file)
binary_file = f'{remaining_seqids_file}.bin'
blastp_std = bw.run_blastdb_aliastool(blastdb_aliastool_path,
remaining_seqids_file,
binary_file)
remaining_seqids_file = binary_file

# BLAST representatives against unclassified sequences
# Iterative process until no more sequences are classified
Expand Down Expand Up @@ -2633,7 +2651,7 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
representative_inputs.append([current_candidates, k, fasta_file,
iteration, blast_selection_dir, blastp_path,
blast_db, config['BLAST Score Ratio'], 1,
select_representatives])
blastdb_aliastool_path, select_representatives])
# Single candidate
else:
representatives[k] = [(v[0][1], v[0][3])]
Expand Down Expand Up @@ -2687,7 +2705,8 @@ def allele_calling(fasta_files, schema_directory, temp_directory,
fo.create_directory(candidates_blast_dir)
new_self_scores = cf.determine_self_scores(concat_repy, candidates_blast_dir,
makeblastdb_path, blastp_path,
'prot', config['CPU cores'])
'prot', config['CPU cores'],
blastdb_aliastool_path)

# This includes self-score for candidates that are not added
# (e.g. classification changes due to multiple matches)
Expand Down
Loading

0 comments on commit 8b5b4a2

Please sign in to comment.