Skip to content

Commit

Permalink
Merge pull request #30 from hillerlab/temporary_files_placement
Browse files Browse the repository at this point in the history
All temp files are regulated
  • Loading branch information
kirilenkobm authored Sep 17, 2023
2 parents 5730aee + cacc845 commit 160f572
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 17 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Make Lastz Chains

[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
![version](https://img.shields.io/badge/version-2.0.4-blue)
![version](https://img.shields.io/badge/version-2.0.5-blue)
[![made-with-Nextflow](https://img.shields.io/badge/Made%20with-Nextflow-23aa62.svg)](https://www.nextflow.io/)

Portable Hillerlab solution for generating pairwise genome alignment chains.
Expand Down
3 changes: 2 additions & 1 deletion constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class Constants:
QUERY_CHROM_SIZES_FILENAME = f"{QUERY_LABEL}.chrom.sizes"

# file and directory names
KENT_TEMP_DIRNAME = "temp_kent"
PART_BULK_FILENAME_PREFIX = "BULK"
LASTZ_OUT_BUCKET_PREFIX = "bucket_ref_"
LASTZ_OUT_BULK_PREFIX = "bucket_ref_bulk"
Expand All @@ -70,7 +71,7 @@ class Constants:
SORTED_PSL_DIRNAME = "sorted_psl"
SPLIT_PSL_DIRNAME = "split_psl"
CHAIN_RUN_OUT_DIRNAME = "chain"
PSL_SORT_TEMP_DIRNAME = "psl_sort_temp_dir"
# PSL_SORT_TEMP_DIRNAME = "psl_sort_temp_dir"

FILL_CHAIN_DIRNAME = "temp_fill_chain"
FILLED_CHAINS_DIRNAME = "filled_chain_files"
Expand Down
7 changes: 4 additions & 3 deletions modules/project_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, project_dir, root_dir, params):

self.hl_kent_binaries = self._j_abs(root_dir, Constants.KENT_BINARIES_DIRNAME)
self.chain_clean_micro_env = self._j_abs(root_dir, Constants.CHAIN_CLEAN_MICRO_ENV)
self.kent_temp_dir = self._j_abs(project_dir, Constants.KENT_TEMP_DIRNAME)

# LASTZ step
self.lastz_working_dir = self._j_abs(project_dir, Constants.TEMP_LASTZ_DIRNAME)
Expand All @@ -39,7 +40,7 @@ def __init__(self, project_dir, root_dir, params):
self.chain_run_dir = self._j_abs(project_dir, Constants.TEMP_AXT_CHAIN_DIRNAME)
self.chain_joblist_path = self._j_abs(self.chain_run_dir, Constants.CHAIN_JOBLIST_FILENAME)
self.chain_output_dir = self._j_abs(self.chain_run_dir, Constants.CHAIN_RUN_OUT_DIRNAME)
self.temp_dir_for_psl_sort = self._j_abs(self.chain_run_dir, Constants.PSL_SORT_TEMP_DIRNAME)
# self.temp_dir_for_psl_sort = self._j_abs(self.chain_run_dir, Constants.PSL_SORT_TEMP_DIRNAME)
self.sorted_psl_dir = self._j_abs(self.chain_run_dir, Constants.SORTED_PSL_DIRNAME)
self.split_psl_dir = self._j_abs(self.chain_run_dir, Constants.SPLIT_PSL_DIRNAME)

Expand Down Expand Up @@ -88,10 +89,10 @@ def _create_dirs(self):
directories_to_create = [
self.lastz_working_dir,
self.lastz_output_dir,
# self.psl_output_dir,
self.kent_temp_dir,
self.cat_out_dirname,
self.chain_run_dir,
self.temp_dir_for_psl_sort,
# self.temp_dir_for_psl_sort,
self.sorted_psl_dir,
self.split_psl_dir,
self.chain_output_dir,
Expand Down
4 changes: 3 additions & 1 deletion steps_implementations/chain_merge_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ def do_chains_merge(params: PipelineParameters,
find_cmd = ["find", project_paths.chain_output_dir, "-name", "*chain"]

# Define the chain_merge_sort command
merge_sort_cmd = [executables.chain_merge_sort, "-inputList=stdin"]
merge_sort_cmd = [executables.chain_merge_sort,
"-inputList=stdin",
f"-tempDir={project_paths.kent_temp_dir}"]

# Define the gzip command
gzip_cmd = ["gzip", "-c"]
Expand Down
2 changes: 1 addition & 1 deletion steps_implementations/chain_run_bundle_substep.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def bundle_chrom_split_psl_files(input_dir: str,
"verbose": verbose
}
os.makedirs(output_dir, exist_ok=True)
to_log(f"Bundling psl files with the followign arguments:")
to_log(f"Bundling psl files with the following arguments:")
for k, v in args.items():
to_log(f"* {k}: {v}")
to_log(f"Saving results to: {output_dir}")
Expand Down
5 changes: 2 additions & 3 deletions steps_implementations/chain_run_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def psl_bundle(cat_out_dirname, project_paths, executables, params):
sort_cmd = [executables.psl_sort_acc,
"nohead",
project_paths.sorted_psl_dir,
project_paths.temp_dir_for_psl_sort,
project_paths.kent_temp_dir,
*concatenated_files]
to_log(f"Sorting PSL files, saving the results to {project_paths.sorted_psl_dir}")
to_log(" ".join(sort_cmd))
Expand All @@ -35,10 +35,9 @@ def psl_bundle(cat_out_dirname, project_paths, executables, params):
f"Error message: {sort_process_result.stderr.decode('utf-8')}"
)

shutil.rmtree(project_paths.temp_dir_for_psl_sort)
# shutil.rmtree(project_paths.temp_dir_for_psl_sort)

# 1.2 -> bundle chrom split files

bundle_chrom_split_psl_files(project_paths.sorted_psl_dir,
params.seq_1_len,
project_paths.split_psl_dir,
Expand Down
8 changes: 4 additions & 4 deletions steps_implementations/clean_chain_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ def do_chains_clean(params: PipelineParameters,

with open(project_paths.chain_cleaner_log, 'w') as f:
clean_process = subprocess.Popen(chain_cleaner_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=_temp_env,
text=True)
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=_temp_env,
text=True)
stdout, stderr = clean_process.communicate()

# Write stdout to log file and also capture it in a variable
Expand Down
4 changes: 2 additions & 2 deletions steps_implementations/fill_chain_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def create_repeat_filler_joblist(params: PipelineParameters,
f"--gapMaxSizeQ {params.fill_gap_max_size_q}",
f"--scoreThreshold {params.fill_insert_chain_min_score}",
f"--gapMinSizeT {params.fill_gap_min_size_t}",
f"--gapMinSizeQ {params.fill_gap_min_size_q}"
f"--gapMinSizeQ {params.fill_gap_min_size_q}",
]
if params.fill_unmask:
to_log("Adding --unmask flag")
Expand Down Expand Up @@ -77,7 +77,7 @@ def merge_filled_chains(params: PipelineParameters,
find_cmd = ["find", project_paths.fill_chain_filled_dir, "-type", "f", "-name", "*.chain", "-print"]

# Create the 'chainMergeSort' command
merge_sort_cmd = [executables.chain_merge_sort, "-inputList=stdin"]
merge_sort_cmd = [executables.chain_merge_sort, "-inputList=stdin", f"-tempDir={project_paths.kent_temp_dir}"]

# Create the 'gzip' command
gzip_cmd = ["gzip", "-c"]
Expand Down
2 changes: 1 addition & 1 deletion version.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def to_string(self):
return self.version_repr


__version__ = Version(2, 0, 4)
__version__ = Version(2, 0, 5)

if __name__ == "__main__":
print(f"Make Lastz Chains Version: {__version__}")
Expand Down

0 comments on commit 160f572

Please sign in to comment.