diff --git a/CHANGELOG.md b/CHANGELOG.md index 70cf37c..7d86e38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Special thanks to the following for their contributions to the release: - [PR #11](https://github.com/seqeralabs/nf-chai/pull/11) - Expose additional Chai-1 parameters in the pipeline - [PR #12](https://github.com/seqeralabs/nf-chai/pull/12) - Add log for GPU/CPU - [PR #13](https://github.com/seqeralabs/nf-chai/pull/13) - Bump `chai_lab` version to 0.4.2 +- [PR #14](https://github.com/seqeralabs/nf-chai/pull/14) - Add parameter to provide multiple sequence alignment directory to Chai-1 ## 0.1.0 diff --git a/README.md b/README.md index c950b08..d03457c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ nextflow run seqeralabs/nf-chai \ Set the `--weights_dir` parameter to a location with the pre-downloaded weights required by Chai-1 to avoid having to download them every time you run the pipeline. +To further improve prediction performance using pre-built multiple sequence alignments (MSA) with evolutionary information, set the `--msa_dir` parameter to a location with [`*.aligned.pqt`](https://github.com/chaidiscovery/chai-lab/tree/main/examples/msas#adding-msa-evolutionary-information) format as required by Chai-1. + ## Credits nf-chai was originally written by the Seqera Team. diff --git a/bin/run_chai_1.py b/bin/run_chai_1.py index 4e3c10a..10c7a96 100755 --- a/bin/run_chai_1.py +++ b/bin/run_chai_1.py @@ -48,6 +48,12 @@ def main(): default=True, help="Use ESM embeddings (enabled by default)" ) + parser.add_argument( + "--msa-dir", + type=str, + default=None, + help="Directory containing precomputed multiple sequence alignments (MSA)." + ) # Parse arguments args = parser.parse_args() @@ -67,7 +73,6 @@ def main(): logging.info("No GPU found, using CPU") device = "cpu" - # Run structure prediction run_inference( fasta_file=args.fasta_file, @@ -77,6 +82,7 @@ def main(): seed=args.seed, device=device, use_esm_embeddings=args.use_esm_embeddings, + msa_directory=Path(args.msa_dir) if args.msa_dir else None, ) if __name__ == "__main__": diff --git a/main.nf b/main.nf index 0be8a6f..e4d2707 100644 --- a/main.nf +++ b/main.nf @@ -44,6 +44,7 @@ workflow { NF_CHAI ( params.input, params.weights_dir, + params.msa_dir, params.num_trunk_recycles, params.num_diffusion_timesteps, params.seed, diff --git a/modules/local/chai_1/main.nf b/modules/local/chai_1/main.nf index 3e1b610..81cbed0 100644 --- a/modules/local/chai_1/main.nf +++ b/modules/local/chai_1/main.nf @@ -7,6 +7,7 @@ process CHAI_1 { input: tuple val(meta), path(fasta) path weights_dir + path msa_dir val num_trunk_recycles val num_diffusion_timesteps val seed @@ -18,16 +19,19 @@ process CHAI_1 { path "versions.yml" , emit: versions script: - def esm_flag = use_esm_embeddings ? '--use-esm-embeddings' : '' + def downloads_dir = weights_dir ?: './downloads' + def msa_path = msa_dir ? "--msa-dir=$msa_dir" : '' + def use_esm = use_esm_embeddings ? '--use-esm-embeddings' : '' """ + CHAI_DOWNLOADS_DIR=$downloads_dir \\ run_chai_1.py \\ --fasta-file ${fasta} \\ - --output-dir . \\ + --output-dir ${meta.id} \\ --num-trunk-recycles ${num_trunk_recycles} \\ --num-diffn-timesteps ${num_diffusion_timesteps} \\ --seed ${seed} \\ - ${esm_flag} \\ - $args + ${use_esm} \\ + ${msa_path} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 35a1436..ce6165a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,6 +12,7 @@ params { // Input options input = null weights_dir = null + msa_dir = null use_gpus = false num_trunk_recycles = 3 num_diffusion_timesteps = 200 @@ -132,6 +133,7 @@ profiles { apptainer { apptainer.enabled = true apptainer.autoMounts = true + apptainer.runOptions = params.use_gpus ? '--nv' : '' conda.enabled = false docker.enabled = false singularity.enabled = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 9d90b47..54e2ccb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -35,6 +35,13 @@ "description": "Directory containing model weights and other artifacts required by Chai-1.", "fa_icon": "fas fa-folder-open" }, + "msa_dir": { + "type": "string", + "format": "directory-path", + "exists": true, + "description": "Directory containing precomputed multiple-sequence alignments", + "fa_icon": "fas fa-align-justify" + }, "use_gpus": { "type": "boolean", "description": "Run compatible tasks on GPUs rather than CPUs (default).", diff --git a/workflows/nf_chai/main.nf b/workflows/nf_chai/main.nf index 4fc1635..5369f93 100644 --- a/workflows/nf_chai/main.nf +++ b/workflows/nf_chai/main.nf @@ -17,7 +17,8 @@ workflow NF_CHAI { take: fasta_file // string: path to fasta file read provided via --input parameter - weights_dir // string: path to model directory read provided via --weights_directory parameter + weights_dir // string: path to model directory read provided via --weights_dir parameter + msa_dir // string: path to the directory containing multiple sequence alignments (msa) num_trunk_recycles // integer: Number of trunk recycles num_diffusion_timesteps // integer: Number of diffusion steps to use seed // integer: Random seed to be used for Chai-1 calculations @@ -39,6 +40,7 @@ workflow NF_CHAI { CHAI_1 ( ch_fasta, weights_dir ? Channel.fromPath(weights_dir) : [], + msa_dir ? Channel.fromPath(msa_dir) : [], num_trunk_recycles, num_diffusion_timesteps, seed,