From cacc845f9926c819a1d83aa9e54c4142cf79c875 Mon Sep 17 00:00:00 2001 From: Bogdan Kirilenko Date: Sun, 17 Sep 2023 10:08:40 +0200 Subject: [PATCH] All temp files are regulated --- README.md | 2 +- constants.py | 3 ++- modules/project_paths.py | 7 ++++--- steps_implementations/chain_merge_step.py | 4 +++- steps_implementations/chain_run_bundle_substep.py | 2 +- steps_implementations/chain_run_step.py | 5 ++--- steps_implementations/clean_chain_step.py | 8 ++++---- steps_implementations/fill_chain_step.py | 4 ++-- version.py | 2 +- 9 files changed, 20 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 09fbdb2..3c3c0a1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Make Lastz Chains [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -![version](https://img.shields.io/badge/version-2.0.4-blue) +![version](https://img.shields.io/badge/version-2.0.5-blue) [![made-with-Nextflow](https://img.shields.io/badge/Made%20with-Nextflow-23aa62.svg)](https://www.nextflow.io/) Portable Hillerlab solution for generating pairwise genome alignment chains. diff --git a/constants.py b/constants.py index 76e9857..f70cf4b 100644 --- a/constants.py +++ b/constants.py @@ -58,6 +58,7 @@ class Constants: QUERY_CHROM_SIZES_FILENAME = f"{QUERY_LABEL}.chrom.sizes" # file and directory names + KENT_TEMP_DIRNAME = "temp_kent" PART_BULK_FILENAME_PREFIX = "BULK" LASTZ_OUT_BUCKET_PREFIX = "bucket_ref_" LASTZ_OUT_BULK_PREFIX = "bucket_ref_bulk" @@ -70,7 +71,7 @@ class Constants: SORTED_PSL_DIRNAME = "sorted_psl" SPLIT_PSL_DIRNAME = "split_psl" CHAIN_RUN_OUT_DIRNAME = "chain" - PSL_SORT_TEMP_DIRNAME = "psl_sort_temp_dir" + # PSL_SORT_TEMP_DIRNAME = "psl_sort_temp_dir" FILL_CHAIN_DIRNAME = "temp_fill_chain" FILLED_CHAINS_DIRNAME = "filled_chain_files" diff --git a/modules/project_paths.py b/modules/project_paths.py index 4a93c19..befcc45 100644 --- a/modules/project_paths.py +++ b/modules/project_paths.py @@ -25,6 +25,7 @@ def __init__(self, project_dir, root_dir, params): self.hl_kent_binaries = self._j_abs(root_dir, Constants.KENT_BINARIES_DIRNAME) self.chain_clean_micro_env = self._j_abs(root_dir, Constants.CHAIN_CLEAN_MICRO_ENV) + self.kent_temp_dir = self._j_abs(project_dir, Constants.KENT_TEMP_DIRNAME) # LASTZ step self.lastz_working_dir = self._j_abs(project_dir, Constants.TEMP_LASTZ_DIRNAME) @@ -39,7 +40,7 @@ def __init__(self, project_dir, root_dir, params): self.chain_run_dir = self._j_abs(project_dir, Constants.TEMP_AXT_CHAIN_DIRNAME) self.chain_joblist_path = self._j_abs(self.chain_run_dir, Constants.CHAIN_JOBLIST_FILENAME) self.chain_output_dir = self._j_abs(self.chain_run_dir, Constants.CHAIN_RUN_OUT_DIRNAME) - self.temp_dir_for_psl_sort = self._j_abs(self.chain_run_dir, Constants.PSL_SORT_TEMP_DIRNAME) + # self.temp_dir_for_psl_sort = self._j_abs(self.chain_run_dir, Constants.PSL_SORT_TEMP_DIRNAME) self.sorted_psl_dir = self._j_abs(self.chain_run_dir, Constants.SORTED_PSL_DIRNAME) self.split_psl_dir = self._j_abs(self.chain_run_dir, Constants.SPLIT_PSL_DIRNAME) @@ -88,10 +89,10 @@ def _create_dirs(self): directories_to_create = [ self.lastz_working_dir, self.lastz_output_dir, - # self.psl_output_dir, + self.kent_temp_dir, self.cat_out_dirname, self.chain_run_dir, - self.temp_dir_for_psl_sort, + # self.temp_dir_for_psl_sort, self.sorted_psl_dir, self.split_psl_dir, self.chain_output_dir, diff --git a/steps_implementations/chain_merge_step.py b/steps_implementations/chain_merge_step.py index b1413cc..2a4cf06 100644 --- a/steps_implementations/chain_merge_step.py +++ b/steps_implementations/chain_merge_step.py @@ -17,7 +17,9 @@ def do_chains_merge(params: PipelineParameters, find_cmd = ["find", project_paths.chain_output_dir, "-name", "*chain"] # Define the chain_merge_sort command - merge_sort_cmd = [executables.chain_merge_sort, "-inputList=stdin"] + merge_sort_cmd = [executables.chain_merge_sort, + "-inputList=stdin", + f"-tempDir={project_paths.kent_temp_dir}"] # Define the gzip command gzip_cmd = ["gzip", "-c"] diff --git a/steps_implementations/chain_run_bundle_substep.py b/steps_implementations/chain_run_bundle_substep.py index 9b16ff7..b45cd1c 100644 --- a/steps_implementations/chain_run_bundle_substep.py +++ b/steps_implementations/chain_run_bundle_substep.py @@ -98,7 +98,7 @@ def bundle_chrom_split_psl_files(input_dir: str, "verbose": verbose } os.makedirs(output_dir, exist_ok=True) - to_log(f"Bundling psl files with the followign arguments:") + to_log(f"Bundling psl files with the following arguments:") for k, v in args.items(): to_log(f"* {k}: {v}") to_log(f"Saving results to: {output_dir}") diff --git a/steps_implementations/chain_run_step.py b/steps_implementations/chain_run_step.py index a28a4df..65553b7 100644 --- a/steps_implementations/chain_run_step.py +++ b/steps_implementations/chain_run_step.py @@ -23,7 +23,7 @@ def psl_bundle(cat_out_dirname, project_paths, executables, params): sort_cmd = [executables.psl_sort_acc, "nohead", project_paths.sorted_psl_dir, - project_paths.temp_dir_for_psl_sort, + project_paths.kent_temp_dir, *concatenated_files] to_log(f"Sorting PSL files, saving the results to {project_paths.sorted_psl_dir}") to_log(" ".join(sort_cmd)) @@ -35,10 +35,9 @@ def psl_bundle(cat_out_dirname, project_paths, executables, params): f"Error message: {sort_process_result.stderr.decode('utf-8')}" ) - shutil.rmtree(project_paths.temp_dir_for_psl_sort) + # shutil.rmtree(project_paths.temp_dir_for_psl_sort) # 1.2 -> bundle chrom split files - bundle_chrom_split_psl_files(project_paths.sorted_psl_dir, params.seq_1_len, project_paths.split_psl_dir, diff --git a/steps_implementations/clean_chain_step.py b/steps_implementations/clean_chain_step.py index 830f0ab..5d9ad95 100644 --- a/steps_implementations/clean_chain_step.py +++ b/steps_implementations/clean_chain_step.py @@ -54,10 +54,10 @@ def do_chains_clean(params: PipelineParameters, with open(project_paths.chain_cleaner_log, 'w') as f: clean_process = subprocess.Popen(chain_cleaner_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=_temp_env, - text=True) + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=_temp_env, + text=True) stdout, stderr = clean_process.communicate() # Write stdout to log file and also capture it in a variable diff --git a/steps_implementations/fill_chain_step.py b/steps_implementations/fill_chain_step.py index 09102b8..fbb08e7 100644 --- a/steps_implementations/fill_chain_step.py +++ b/steps_implementations/fill_chain_step.py @@ -27,7 +27,7 @@ def create_repeat_filler_joblist(params: PipelineParameters, f"--gapMaxSizeQ {params.fill_gap_max_size_q}", f"--scoreThreshold {params.fill_insert_chain_min_score}", f"--gapMinSizeT {params.fill_gap_min_size_t}", - f"--gapMinSizeQ {params.fill_gap_min_size_q}" + f"--gapMinSizeQ {params.fill_gap_min_size_q}", ] if params.fill_unmask: to_log("Adding --unmask flag") @@ -77,7 +77,7 @@ def merge_filled_chains(params: PipelineParameters, find_cmd = ["find", project_paths.fill_chain_filled_dir, "-type", "f", "-name", "*.chain", "-print"] # Create the 'chainMergeSort' command - merge_sort_cmd = [executables.chain_merge_sort, "-inputList=stdin"] + merge_sort_cmd = [executables.chain_merge_sort, "-inputList=stdin", f"-tempDir={project_paths.kent_temp_dir}"] # Create the 'gzip' command gzip_cmd = ["gzip", "-c"] diff --git a/version.py b/version.py index ec15bc4..3b26fc8 100755 --- a/version.py +++ b/version.py @@ -33,7 +33,7 @@ def to_string(self): return self.version_repr -__version__ = Version(2, 0, 4) +__version__ = Version(2, 0, 5) if __name__ == "__main__": print(f"Make Lastz Chains Version: {__version__}")