Skip to content

Commit

Permalink
Little rearrangement -> not using separate dir for ChainNet and NetFi…
Browse files Browse the repository at this point in the history
…lterNonNested.perl
  • Loading branch information
kirilenkobm committed Sep 27, 2023
1 parent 9f16d8b commit 43927f1
Show file tree
Hide file tree
Showing 9 changed files with 24 additions and 41 deletions.
14 changes: 9 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@ modules/__pycache__/*
*/__pycache__/

# Kent binaries
HL_kent_binaries/pslSortAcc
HL_kent_binaries/axtChain
HL_kent_binaries/axtToPsl
HL_kent_binaries/chainAntiRepeat
HL_kent_binaries/chainMergeSort
HL_kent_binaries/chainCleaner
HL_kent_binaries/chainSort
HL_kent_binaries/chainFilter
HL_kent_binaries/chainMergeSort
HL_kent_binaries/chainNet
HL_kent_binaries/chainScore
chain_clean_micro_env/chainNet
/HL_kent_binaries/chainFilter
HL_kent_binaries/chainSort
HL_kent_binaries/faToTwoBit
HL_kent_binaries/pslSortAcc
HL_kent_binaries/twoBitToFa
# to be continued

# test data
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions HL_kent_binaries/readme.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Directory to store Kent binaries necessary to run the pipeline,
which are not included in the $PATH for some reason,
and were downloaded using install_dependencies.py script.

Although NetFilterNonNested.perl is not actually a binary, it's only purpose
is to serve as a dependency to chainCleaner, as well as ChainNet, which is not
used directly by the pipeline.
6 changes: 3 additions & 3 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

## Nice to do

- Refactor HL kent dependencies -> maybe it was not necessary to split into 2 dirs?
- ~~Refactor HL kent dependencies -> maybe it was not necessary to split into 2 dirs?~~ -> not split anymore
- QC module or something - detailed statistics per each step
- Explanation for each pipeline parameter in the parse_args
- Document masking, etc. - nuances that affect the pipeline performance.
- ~~Refactor chain gap filler: get rid of chainExtractID dependency -> not needed~~
- ~~read parameters from config file~~
- Document masking, etc. - nuances that affect the pipeline performance.
- https://github.com/hillerlab/make_lastz_chains/issues/20 - temp files location
- ~~https://github.com/hillerlab/make_lastz_chains/issues/20 - temp files location~~

## Minor things

Expand Down
7 changes: 0 additions & 7 deletions chain_clean_micro_env/readme.txt

This file was deleted.

7 changes: 1 addition & 6 deletions install_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

SCRIPT_LOCATION = os.path.abspath(os.path.dirname(__file__))
DESTINATION_DIR = os.path.join(SCRIPT_LOCATION, "HL_kent_binaries")
CHAIN_NET_DIR = os.path.join(SCRIPT_LOCATION, "chain_clean_micro_env")
HG_DOWNLOAD_LINK = "https://hgdownload.cse.ucsc.edu/admin/exe/"

# OS related
Expand Down Expand Up @@ -51,11 +50,7 @@ def process_tool(tool_name):
# not found, need to acquire
download_link = f"{HG_DOWNLOAD_LINK}/{HG_DOWNLOAD_DIRNAME}/{tool_name}"
# destination dir for all binaries necessary to run the pipeline is HL_kent_binaries
# chainNet is only necessary for chainCleaner, and is saved to chain_clean_micro_env
# a directory that serves as temporary extension of the $PATH
# only to run chainCleaner
destination_dir = DESTINATION_DIR if tool_name != "chainNet" else CHAIN_NET_DIR
destination = os.path.join(destination_dir, tool_name)
destination = os.path.join(DESTINATION_DIR, tool_name)

if os.path.isfile(destination):
# if already in destination directory: just skip it
Expand Down
1 change: 1 addition & 0 deletions make_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def save_final_chain(parameters: PipelineParameters, project_paths: ProjectPaths
shutil.move(last_chain_file, project_paths.final_chain)
to_log(f"Saved final chains file to {project_paths.final_chain}")


def _del_file_and_log(path):
os.remove(path)
to_log(f"x {path}")
Expand Down
22 changes: 4 additions & 18 deletions modules/step_executables.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@ def __init__(self, root_dir):
self.chain_cleaner = self.__find_binary(Constants.ToolNames.CHAIN_CLEANER)
self.chain_sort = self.__find_binary(Constants.ToolNames.CHAIN_SORT)
self.chain_score = self.__find_binary(Constants.ToolNames.CHAIN_SCORE)
self.lastz = self.__find_binary(Constants.ToolNames.LASTZ)
self.chain_net = self.__find_binary(Constants.ToolNames.CHAIN_NET)
self.chain_filter = self.__find_binary(Constants.ToolNames.CHAIN_FILTER)

# ChainNet is special for now
self.chain_net = self.__locate_chain_net(Constants.ToolNames.CHAIN_NET)
self.lastz = self.__find_binary(Constants.ToolNames.LASTZ)

self.__check_completeness()

Expand All @@ -57,27 +55,15 @@ def __find_binary(self, binary_name):
to_log(f"* found {binary_name} at {binary_path}")
return binary_path

def __locate_chain_net(self, chain_net):
if shutil.which(chain_net):
to_log(f"found {chain_net} in $PATH")
return True
elif os.path.isfile(os.path.join(self.chain_clean_env_dir, chain_net)):
to_log(f"found {chain_net} in {self.chain_clean_env_dir}")
return True
self.not_found.append(chain_net)
return None

def __check_completeness(self):
if len(self.not_found) == 0:
to_log("All necessary executables found.")
return
not_found_bins = "\n".join([f"* {x}" for x in self.not_found])
err_msg = (
f"Error! The following tools not found neither in $PATH nor "
f"in the download dir:\n{not_found_bins}\nPlease note that "
f"chainNet should be placed either in $PATH or in the "
f"{self.chain_clean_env_dir} directory. Other tools are "
f"expected to be either in $PATH or {self.hl_kent_binaries_path}\n"
f"in the download dir:\n{not_found_bins}\n"
f"The tools are expected to be either in $PATH or {self.hl_kent_binaries_path}\n"
f"Please use install_dependencies.py to automate the process."
)
raise ExecutableNotFoundError(err_msg)
4 changes: 2 additions & 2 deletions steps_implementations/clean_chain_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def do_chains_clean(params: PipelineParameters,
_intermediate_chain = f"{_output_chain}__temp"
_clean_chain_args = params.clean_chain_parameters.split()

# dirty hack to override chainNet not found error
# some Kent binaries and NetFilterNonNested.perl are necessary to run chainCleaner
_temp_env = os.environ.copy()
_temp_env["PATH"] = f"{project_paths.chain_clean_micro_env}:" + _temp_env["PATH"]
_temp_env["PATH"] = f"{project_paths.hl_kent_binaries}:" + _temp_env["PATH"]

chain_cleaner_cmd = [
executables.chain_cleaner,
Expand Down

0 comments on commit 43927f1

Please sign in to comment.