From 17395925422ba172e132ba3f5164eaf3d8cc979e Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 11:18:20 -0500 Subject: [PATCH 01/23] feat: Added msa parameter. --- bin/run_chai_1.py | 1 + main.nf | 3 ++- modules/local/chai_1/main.nf | 3 +++ nextflow.config | 1 + nextflow_schema.json | 7 +++++++ workflows/nf_chai/main.nf | 4 +++- 6 files changed, 17 insertions(+), 2 deletions(-) diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index 4e3c10a..b82c4de 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -77,6 +77,7 @@ def main(): seed=args.seed, device=device, use_esm_embeddings=args.use_esm_embeddings, + msa_directory=msa_dir, ) if __name__ == "__main__": diff --git a/main.nf b/main.nf index 0be8a6f..d488507 100644 --- a/main.nf +++ b/main.nf @@ -47,7 +47,8 @@ workflow { params.num_trunk_recycles, params.num_diffusion_timesteps, params.seed, - params.use_esm_embeddings + params.use_esm_embeddings, + params.msa_dir ) // diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 3e1b610..0245000 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -11,6 +11,7 @@ process CHAI_1 { val num_diffusion_timesteps val seed val use_esm_embeddings + path msa_dir output: tuple val(meta), path("${meta.id}/*.cif"), emit: structures @@ -19,6 +20,7 @@ process CHAI_1 { script: def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' + def msa_flag = msa_dir ? '--msa_directory=$msa_dir' : '' """ run_chai_1.py \\ --fasta-file ${fasta} \\ @@ -27,6 +29,7 @@ process CHAI_1 { --num-diffn-timesteps ${num_diffusion_timesteps} \\ --seed ${seed} \\ ${esm_flag} \\ + ${msa_flag} \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index 35a1436..a9e859f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { num_diffusion_timesteps = 200 seed = 42 use_esm_embeddings = true + msa_dir = '/Users/florian.wuennemann/2_Areas/Seqera/github_repos/nf-chai-main/msa_dir_test' // Boilerplate options outdir = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 9d90b47..5843c87 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -67,6 +67,13 @@ "fa_icon": "fas fa-stamp", "hidden": true, "description": "Use user-provided esm model embeddings" + }, + "msa_dir": { + "type": "string", + "default": "/Users/florian.wuennemann/2_Areas/Seqera/github_repos/nf-chai-main/msa_dir_test", + "hidden": true, + "description": "Directory containing precomputed multiple-sequence alignments", + "fa_icon": "fas fa-align-justify" } } }, diff --git a/workflows/nf_chai/main.nf b/workflows/nf_chai/main.nf index 4fc1635..6a95345 100644 --- a/workflows/nf_chai/main.nf +++ b/workflows/nf_chai/main.nf @@ -22,6 +22,7 @@ workflow NF_CHAI { num_diffusion_timesteps // integer: Number of diffusion steps to use seed // integer: Random seed to be used for Chai-1 calculations use_esm_embeddings // boolean: Use user-provided esm model embeddings + msa_dir // string: path to the directory containing multiple sequence alignments (msa) main: @@ -42,7 +43,8 @@ workflow NF_CHAI { num_trunk_recycles, num_diffusion_timesteps, seed, - use_esm_embeddings + use_esm_embeddings, + msa_dir ? Channel.fromPath(msa_dir) : [] ) ch_versions = ch_versions.mix(CHAI_1.out.versions) From 489727fe732ffc04fd8bf070734303d1ae2e50d0 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 11:43:53 -0500 Subject: [PATCH 02/23] Fix: Remove local path from config. --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index a9e859f..e543674 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,7 @@ params { num_diffusion_timesteps = 200 seed = 42 use_esm_embeddings = true - msa_dir = '/Users/florian.wuennemann/2_Areas/Seqera/github_repos/nf-chai-main/msa_dir_test' + msa_dir = null // Boilerplate options outdir = null From 4723a3d0be2ba77718ce95b5906e5a2e2aac3cfe Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 13:11:07 -0500 Subject: [PATCH 03/23] Fix: Added CHAI_DOWNLOADS_DIR back into module. --- bin/run_chai_1.py | 8 +++++++- modules/local/chai_1/main.nf | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index b82c4de..6888012 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -48,6 +48,12 @@ def main(): default=True, help="Use ESM embeddings (enabled by default)" ) + parser.add_argument( + "--msa_dir", + type=str, + default=None, + help="Directory containing precomputed multiple sequence alignments (MSA)." + ) # Parse arguments args = parser.parse_args() @@ -77,7 +83,7 @@ def main(): seed=args.seed, device=device, use_esm_embeddings=args.use_esm_embeddings, - msa_directory=msa_dir, + msa_directory=args.msa_dir, ) if __name__ == "__main__": diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 0245000..cb03c1d 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -19,9 +19,11 @@ process CHAI_1 { path "versions.yml" , emit: versions script: + def downloads_dir = weights_dir ?: './downloads' def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' def msa_flag = msa_dir ? '--msa_directory=$msa_dir' : '' """ + CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ --fasta-file ${fasta} \\ --output-dir . \\ @@ -29,8 +31,7 @@ process CHAI_1 { --num-diffn-timesteps ${num_diffusion_timesteps} \\ --seed ${seed} \\ ${esm_flag} \\ - ${msa_flag} \\ - $args + ${msa_flag} cat <<-END_VERSIONS > versions.yml "${task.process}": From 6f6a1375aef7c5438e6c936b0210609ab58ae8ea Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 15:15:11 -0500 Subject: [PATCH 04/23] fix: Removed resource label from CHAI_1 and added groundswell optimized process specific resource config. --- modules/local/chai_1/main.nf | 1 - nextflow.config | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index cb03c1d..77065b1 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -1,6 +1,5 @@ process CHAI_1 { tag "$meta.id" - label 'process_high' conda "${moduleDir}/environment.yml" container 'community.wave.seqera.io/library/gcc_linux-64_python_cuda_pip_chai_lab:44cb323409492b49' diff --git a/nextflow.config b/nextflow.config index e543674..d278c87 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,6 +47,13 @@ process { accelerator = 1 } } + + withName: 'NF_CHAI:CHAI_1' { + cpus = { 2 * task.attempt } + memory = { 16.GB * task.attempt } + } + + } // Load base.config by default for all pipelines From 75e9ed9804ad6a42c69c35ae1303085449d7499c Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 16:31:35 -0500 Subject: [PATCH 05/23] fix: Added apptainer gpu config runOption. --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index d278c87..9e6e8b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -140,6 +140,7 @@ profiles { apptainer { apptainer.enabled = true apptainer.autoMounts = true + apptainer.runOptions = params.use_gpus ? '--nv' : '' conda.enabled = false docker.enabled = false singularity.enabled = false From 0022e02567c09bccc00de6eec70d4fdd73e81ad6 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 16:49:30 -0500 Subject: [PATCH 06/23] fix: set output-dir param to meta.id again --- modules/local/chai_1/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 77065b1..58ba77b 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -25,7 +25,7 @@ process CHAI_1 { CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ --fasta-file ${fasta} \\ - --output-dir . \\ + --output-dir ${meta.id} \\ --num-trunk-recycles ${num_trunk_recycles} \\ --num-diffn-timesteps ${num_diffusion_timesteps} \\ --seed ${seed} \\ From bd2aa5e3f08dd6064d779bba78e9e0c9dc4caed4 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 16:52:32 -0500 Subject: [PATCH 07/23] fix: changed torch device definition back, to work on cpu only machines. --- bin/run_chai_1.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index 6888012..13cb699 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -65,14 +65,7 @@ def main(): # Create the output directory if it doesn't exist args.output_dir.mkdir(parents=True, exist_ok=True) - # Set device for PyTorch - if torch.cuda.is_available(): - logging.info("GPU found, using GPU") - device = torch.device("cuda") - else: - logging.info("No GPU found, using CPU") - device = "cpu" - + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Run structure prediction run_inference( From 0056f469f647a7c1fc8dec78ff33beff9596b08e Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 17:12:07 -0500 Subject: [PATCH 08/23] fix: Fixed indentation in config. --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9e6e8b3..56adcc3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -140,7 +140,7 @@ profiles { apptainer { apptainer.enabled = true apptainer.autoMounts = true - apptainer.runOptions = params.use_gpus ? '--nv' : '' + apptainer.runOptions = params.use_gpus ? '--nv' : '' conda.enabled = false docker.enabled = false singularity.enabled = false From 37444883d831a5401ccd47937edd06e9007ef4f5 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 17:42:14 -0500 Subject: [PATCH 09/23] fix: Added Path to msa_dir in run_chai_1.py --- bin/run_chai_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index 13cb699..7ea691d 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -76,7 +76,7 @@ def main(): seed=args.seed, device=device, use_esm_embeddings=args.use_esm_embeddings, - msa_directory=args.msa_dir, + msa_directory=Path(args.msa_dir), ) if __name__ == "__main__": From f8208e49c0c9e9ecaa08da4b3dd30e513e8f80c6 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 17:48:29 -0500 Subject: [PATCH 10/23] fix: Fixed indentation and quotes for defs in CHAI_1 --- modules/local/chai_1/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 58ba77b..2d03c68 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -18,9 +18,9 @@ process CHAI_1 { path "versions.yml" , emit: versions script: - def downloads_dir = weights_dir ?: './downloads' + def downloads_dir = weights_dir ?: './downloads' def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' - def msa_flag = msa_dir ? '--msa_directory=$msa_dir' : '' + def msa_flag = msa_dir ? "--msa_directory=$msa_dir" : '' """ CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ From 9f2162061a2670cd34eab6a2fd98b5c46b42810c Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 17:56:48 -0500 Subject: [PATCH 11/23] fix: Add exception in run_chai_1.py for case that msa is not provided. --- bin/run_chai_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index 7ea691d..f3b2f55 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -76,7 +76,7 @@ def main(): seed=args.seed, device=device, use_esm_embeddings=args.use_esm_embeddings, - msa_directory=Path(args.msa_dir), + msa_directory=Path(args.msa_dir) if args.msa_dir else None, ) if __name__ == "__main__": From 67b5b99ff65ac1a426b256f7490b71f06c9761b2 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 20:01:35 -0500 Subject: [PATCH 12/23] fix: Updated nextflow_schema.json. --- nextflow_schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 5843c87..eb78459 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -70,7 +70,6 @@ }, "msa_dir": { "type": "string", - "default": "/Users/florian.wuennemann/2_Areas/Seqera/github_repos/nf-chai-main/msa_dir_test", "hidden": true, "description": "Directory containing precomputed multiple-sequence alignments", "fa_icon": "fas fa-align-justify" From 14db70453913f75aaa30625d5a851a1bf966c5aa Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 20:19:26 -0500 Subject: [PATCH 13/23] chore: Updated changelog. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70cf37c..b7406a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Special thanks to the following for their contributions to the release: - [PR #11](https://github.com/seqeralabs/nf-chai/pull/11) - Expose additional Chai-1 parameters in the pipeline - [PR #12](https://github.com/seqeralabs/nf-chai/pull/12) - Add log for GPU/CPU - [PR #13](https://github.com/seqeralabs/nf-chai/pull/13) - Bump `chai_lab` version to 0.4.2 +- [PR #14](https://github.com/seqeralabs/nf-chai/pull/14) - Add Multiple sequence alignment (msa) directory param + fix dev ## 0.1.0 From 33c0d8593103ee8e6a8c9974e04ebb9e719e8240 Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 20:21:08 -0500 Subject: [PATCH 14/23] fix: Fixed left padding in nextflow.config --- nextflow.config | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 56adcc3..60ab307 100644 --- a/nextflow.config +++ b/nextflow.config @@ -48,10 +48,10 @@ process { } } - withName: 'NF_CHAI:CHAI_1' { - cpus = { 2 * task.attempt } - memory = { 16.GB * task.attempt } - } + withName: 'NF_CHAI:CHAI_1' { + cpus = { 2 * task.attempt } + memory = { 16.GB * task.attempt } + } } From 915fe50f2e2e2771fe8e651f86a0b3a662f6de2d Mon Sep 17 00:00:00 2001 From: FloWuenne Date: Fri, 29 Nov 2024 20:38:39 -0500 Subject: [PATCH 15/23] fix: Fixed msa_dir param input for run_chai_1.py --- modules/local/chai_1/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 2d03c68..71a3d02 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -20,7 +20,7 @@ process CHAI_1 { script: def downloads_dir = weights_dir ?: './downloads' def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' - def msa_flag = msa_dir ? "--msa_directory=$msa_dir" : '' + def msa_flag = msa_dir ? "--msa_dir=$msa_dir" : '' """ CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ From 80b206bf4cd387262bf10bfdbaee38ff45f2dafb Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 11:49:14 +0000 Subject: [PATCH 16/23] chore: update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7406a3..7d86e38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ Special thanks to the following for their contributions to the release: - [PR #11](https://github.com/seqeralabs/nf-chai/pull/11) - Expose additional Chai-1 parameters in the pipeline - [PR #12](https://github.com/seqeralabs/nf-chai/pull/12) - Add log for GPU/CPU - [PR #13](https://github.com/seqeralabs/nf-chai/pull/13) - Bump `chai_lab` version to 0.4.2 -- [PR #14](https://github.com/seqeralabs/nf-chai/pull/14) - Add Multiple sequence alignment (msa) directory param + fix dev +- [PR #14](https://github.com/seqeralabs/nf-chai/pull/14) - Add parameter to provide multiple sequence alignment directory to Chai-1 ## 0.1.0 From 48665f586e96255d49a3c1d25e8386941d8cf900 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 11:52:11 +0000 Subject: [PATCH 17/23] chore: rename underscores to dashes in Python script for consistency --- bin/run_chai_1.py | 10 ++++++++-- modules/local/chai_1/main.nf | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index f3b2f55..10c7a96 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -49,7 +49,7 @@ def main(): help="Use ESM embeddings (enabled by default)" ) parser.add_argument( - "--msa_dir", + "--msa-dir", type=str, default=None, help="Directory containing precomputed multiple sequence alignments (MSA)." @@ -65,7 +65,13 @@ def main(): # Create the output directory if it doesn't exist args.output_dir.mkdir(parents=True, exist_ok=True) - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + # Set device for PyTorch + if torch.cuda.is_available(): + logging.info("GPU found, using GPU") + device = torch.device("cuda") + else: + logging.info("No GPU found, using CPU") + device = "cpu" # Run structure prediction run_inference( diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 71a3d02..ef7db6b 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -20,7 +20,7 @@ process CHAI_1 { script: def downloads_dir = weights_dir ?: './downloads' def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' - def msa_flag = msa_dir ? "--msa_dir=$msa_dir" : '' + def msa_flag = msa_dir ? "--msa-dir=$msa_dir" : '' """ CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ From 816b2b6dd068f79841b3171bb27c882b2a42eac4 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 11:55:11 +0000 Subject: [PATCH 18/23] chore: move --msa_dir param up in schema and add appropriate fields --- nextflow_schema.json | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index eb78459..54e2ccb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -35,6 +35,13 @@ "description": "Directory containing model weights and other artifacts required by Chai-1.", "fa_icon": "fas fa-folder-open" }, + "msa_dir": { + "type": "string", + "format": "directory-path", + "exists": true, + "description": "Directory containing precomputed multiple-sequence alignments", + "fa_icon": "fas fa-align-justify" + }, "use_gpus": { "type": "boolean", "description": "Run compatible tasks on GPUs rather than CPUs (default).", @@ -67,12 +74,6 @@ "fa_icon": "fas fa-stamp", "hidden": true, "description": "Use user-provided esm model embeddings" - }, - "msa_dir": { - "type": "string", - "hidden": true, - "description": "Directory containing precomputed multiple-sequence alignments", - "fa_icon": "fas fa-align-justify" } } }, From 7ebaddb8ecbad2f4baeb52a94e3aa89dfd4f043e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 12:03:04 +0000 Subject: [PATCH 19/23] chore: move --msa_dir param up in parameter priority as input --- main.nf | 4 ++-- nextflow.config | 2 +- workflows/nf_chai/main.nf | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index d488507..e4d2707 100644 --- a/main.nf +++ b/main.nf @@ -44,11 +44,11 @@ workflow { NF_CHAI ( params.input, params.weights_dir, + params.msa_dir, params.num_trunk_recycles, params.num_diffusion_timesteps, params.seed, - params.use_esm_embeddings, - params.msa_dir + params.use_esm_embeddings ) // diff --git a/nextflow.config b/nextflow.config index 60ab307..d3c8ae7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,12 +12,12 @@ params { // Input options input = null weights_dir = null + msa_dir = null use_gpus = false num_trunk_recycles = 3 num_diffusion_timesteps = 200 seed = 42 use_esm_embeddings = true - msa_dir = null // Boilerplate options outdir = null diff --git a/workflows/nf_chai/main.nf b/workflows/nf_chai/main.nf index 6a95345..8abf276 100644 --- a/workflows/nf_chai/main.nf +++ b/workflows/nf_chai/main.nf @@ -16,13 +16,13 @@ include { CHAI_1 } from '../../modules/local/chai_1' workflow NF_CHAI { take: - fasta_file // string: path to fasta file read provided via --input parameter - weights_dir // string: path to model directory read provided via --weights_directory parameter + fasta_fle // string: path to fasta file read provided via --input parameter + weights_dir // string: path to model directory read provided via --weights_dir parameter + msa_dir // string: path to the directory containing multiple sequence alignments (msa) num_trunk_recycles // integer: Number of trunk recycles num_diffusion_timesteps // integer: Number of diffusion steps to use seed // integer: Random seed to be used for Chai-1 calculations use_esm_embeddings // boolean: Use user-provided esm model embeddings - msa_dir // string: path to the directory containing multiple sequence alignments (msa) main: @@ -40,11 +40,11 @@ workflow NF_CHAI { CHAI_1 ( ch_fasta, weights_dir ? Channel.fromPath(weights_dir) : [], + msa_dir ? Channel.fromPath(msa_dir) : [], num_trunk_recycles, num_diffusion_timesteps, seed, - use_esm_embeddings, - msa_dir ? Channel.fromPath(msa_dir) : [] + use_esm_embeddings ) ch_versions = ch_versions.mix(CHAI_1.out.versions) From a0559bce63248bb647b9f4fa16b37325b13ef2c1 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 12:55:18 +0000 Subject: [PATCH 20/23] fix: bug in fasta file name --- workflows/nf_chai/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/nf_chai/main.nf b/workflows/nf_chai/main.nf index 8abf276..5369f93 100644 --- a/workflows/nf_chai/main.nf +++ b/workflows/nf_chai/main.nf @@ -16,7 +16,7 @@ include { CHAI_1 } from '../../modules/local/chai_1' workflow NF_CHAI { take: - fasta_fle // string: path to fasta file read provided via --input parameter + fasta_file // string: path to fasta file read provided via --input parameter weights_dir // string: path to model directory read provided via --weights_dir parameter msa_dir // string: path to the directory containing multiple sequence alignments (msa) num_trunk_recycles // integer: Number of trunk recycles From 9b402316de81c22dbc0d78f0035a3888a7093806 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 13:01:08 +0000 Subject: [PATCH 21/23] chore: change some variable names in main module --- modules/local/chai_1/main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index ef7db6b..7f327ac 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -6,11 +6,11 @@ process CHAI_1 { input: tuple val(meta), path(fasta) path weights_dir + path msa_dir val num_trunk_recycles val num_diffusion_timesteps val seed val use_esm_embeddings - path msa_dir output: tuple val(meta), path("${meta.id}/*.cif"), emit: structures @@ -18,9 +18,9 @@ process CHAI_1 { path "versions.yml" , emit: versions script: - def downloads_dir = weights_dir ?: './downloads' - def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' - def msa_flag = msa_dir ? "--msa-dir=$msa_dir" : '' + def downloads_dir = weights_dir ?: './downloads' + def msa_path = msa_dir ? "--msa-dir=$msa_dir" : '' + def use_esm = use_esm_embeddings ? '--use-esm-embeddings' : '' """ CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ @@ -29,8 +29,8 @@ process CHAI_1 { --num-trunk-recycles ${num_trunk_recycles} \\ --num-diffn-timesteps ${num_diffusion_timesteps} \\ --seed ${seed} \\ - ${esm_flag} \\ - ${msa_flag} + ${use_esm} \\ + ${msa_path} cat <<-END_VERSIONS > versions.yml "${task.process}": From 9c5bc4ae48468f94ace6c0d1c79472b77fba8d58 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 15:21:33 +0000 Subject: [PATCH 22/23] fix: revert removal of process_high label --- modules/local/chai_1/main.nf | 1 + nextflow.config | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 7f327ac..81cbed0 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -1,5 +1,6 @@ process CHAI_1 { tag "$meta.id" + label 'process_high' conda "${moduleDir}/environment.yml" container 'community.wave.seqera.io/library/gcc_linux-64_python_cuda_pip_chai_lab:44cb323409492b49' diff --git a/nextflow.config b/nextflow.config index d3c8ae7..ce6165a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,13 +47,6 @@ process { accelerator = 1 } } - - withName: 'NF_CHAI:CHAI_1' { - cpus = { 2 * task.attempt } - memory = { 16.GB * task.attempt } - } - - } // Load base.config by default for all pipelines From 3b26c20672812499556a485d706df1a53783e44b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 2 Dec 2024 15:26:58 +0000 Subject: [PATCH 23/23] docs: add sentence about --msa_dir to main README --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c950b08..d03457c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ nextflow run seqeralabs/nf-chai \ Set the `--weights_dir` parameter to a location with the pre-downloaded weights required by Chai-1 to avoid having to download them every time you run the pipeline. +To further improve prediction performance using pre-built multiple sequence alignments (MSA) with evolutionary information, set the `--msa_dir` parameter to a location with [`*.aligned.pqt`](https://github.com/chaidiscovery/chai-lab/tree/main/examples/msas#adding-msa-evolutionary-information) format as required by Chai-1. + ## Credits nf-chai was originally written by the Seqera Team.