diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml
index b30ada17..b26623a3 100644
--- a/.github/workflows/branch.yml
+++ b/.github/workflows/branch.yml
@@ -1,15 +1,15 @@
 name: nf-core branch protection
-# This workflow is triggered on PRs to master branch on the repository
-# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev`
+# This workflow is triggered on PRs to main branch on the repository
+# It fails when someone tries to make a PR against the nf-core `main` branch instead of `dev`
 on:
   pull_request_target:
-    branches: [master]
+    branches: [main]
 
 jobs:
   test:
     runs-on: ubuntu-latest
     steps:
-      # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches
+      # PRs to the nf-core repo main branch are only ok if coming from the nf-core repo `dev` or any `patch` branches
       - name: Check PRs
         if: github.repository == 'sanger-tol/blobtoolkit'
         run: |
@@ -22,7 +22,7 @@ jobs:
         uses: mshick/add-pr-comment@v1
         with:
           message: |
-            ## This PR is against the `master` branch :x:
+            ## This PR is against the `main` branch :x:
 
             * Do not close this PR
             * Click _Edit_ and change the `base` to `dev`
@@ -32,9 +32,9 @@ jobs:
 
             Hi @${{ github.event.pull_request.user.login }},
 
-            It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch.
-            The `master` branch on nf-core repositories should always contain code from the latest release.
-            Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch.
+            It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `main` branch.
+            The `main` branch on nf-core repositories should always contain code from the latest release.
+            Because of this, PRs to `main` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch.
 
             You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page.
             Note that even after this, the test will continue to show as failing until you push a new commit.
diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml
index 406a6280..32849b2e 100644
--- a/.github/workflows/sanger_test.yml
+++ b/.github/workflows/sanger_test.yml
@@ -17,7 +17,7 @@ jobs:
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
           access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
-          compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
+          compute_env: ${{ secrets.TOWER_COMPUTE_ENV_LARGE }}
           revision: ${{ env.REVISION }}
           workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ env.REVISION }}
           parameters: |
diff --git a/.github/workflows/sanger_test_full.yml b/.github/workflows/sanger_test_full.yml
index e3a25f7b..b44c29f4 100644
--- a/.github/workflows/sanger_test_full.yml
+++ b/.github/workflows/sanger_test_full.yml
@@ -1,6 +1,10 @@
 name: sanger-tol LSF full size tests
 
 on:
+  push:
+    branches:
+      - main
+      - dev
   workflow_dispatch:
 jobs:
   run-tower:
@@ -22,7 +26,7 @@ jobs:
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
           access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
-          compute_env: ${{ secrets.TOWER_COMPUTE_ENV }}
+          compute_env: ${{ secrets.TOWER_COMPUTE_ENV_LARGE }}
           revision: ${{ env.REVISION }}
           workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ env.REVISION }}
           parameters: |
diff --git a/.nf-core.yml b/.nf-core.yml
index 2a47982a..85e18745 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -18,6 +18,7 @@ lint:
     - .github/ISSUE_TEMPLATE/bug_report.yml
     - .github/PULL_REQUEST_TEMPLATE.md
     - .github/workflows/linting.yml
+    - .github/workflows/branch.yml
   multiqc_config:
     - report_comment
   nextflow_config:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 240bcd13..b1f08975 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,30 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [[0.5.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.5.0)] – Snorlax – [2024-07-31]
+
+General tidy up of the configuration and the pipeline
+
+### Enhancements & fixes
+
+- Increased the resources for blastn
+- Removed some options that were not used or not needed
+- All relevant outputs are now copied to the output directory
+- Fixed some blast parameters to match the behaviour of the Snakemake pipeline
+- Fixed parsing of samplesheets from fetchngs to capture correct data type
+
+### Parameters
+
+| Old parameter   | New parameter |
+| --------------- | ------------- |
+| --taxa_file     |               |
+| --blastp_outext |               |
+| --blastp_cols   |               |
+| --blastx_outext |               |
+| --blastx_cols   |               |
+
+> **NB:** Parameter has been **updated** if both old and new parameter information is present. </br> **NB:** Parameter has been **added** if just the new parameter information is present. </br> **NB:** Parameter has been **removed** if new parameter information isn't present.
+
 ## [[0.4.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.4.0)] – Buneary – [2024-04-17]
 
 The pipeline has now been validated on dozens of genomes, up to 11 Gbp.
diff --git a/README.md b/README.md
index 31512bc0..c7b92970 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,8 @@ It takes a samplesheet of BAM/CRAM/FASTQ/FASTA files as input, calculates genome
 4. Run BUSCO ([`busco`](https://busco.ezlab.org/))
 5. Extract BUSCO genes ([`blobtoolkit/extractbuscos`](https://github.com/blobtoolkit/blobtoolkit))
 6. Run Diamond BLASTp against extracted BUSCO genes ([`diamond/blastp`](https://github.com/bbuchfink/diamond))
-7. Run BLASTn against extracted BUSCO genes ([`blast/blastn`](https://www.ncbi.nlm.nih.gov/books/NBK131777/))
-8. Run BLASTx against extracted BUSCO genes ([`blast/blastx`](https://www.ncbi.nlm.nih.gov/books/NBK131777/))
+7. Run BLASTx against sequences with no hit ([`blast/blastn`](https://www.ncbi.nlm.nih.gov/books/NBK131777/))
+8. Run BLASTn against sequences still with not hit ([`blast/blastx`](https://www.ncbi.nlm.nih.gov/books/NBK131777/))
 9. Count BUSCO genes ([`blobtoolkit/countbuscos`](https://github.com/blobtoolkit/blobtoolkit))
 10. Generate combined sequence stats across various window sizes ([`blobtoolkit/windowstats`](https://github.com/blobtoolkit/blobtoolkit))
 11. Imports analysis results into a BlobDir dataset ([`blobtoolkit/blobdir`](https://github.com/blobtoolkit/blobtoolkit))
diff --git a/conf/base.config b/conf/base.config
index 8f51f7f8..75fa0d06 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -104,6 +104,18 @@ process {
         time   = { check_max( 3.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') }
     }
 
+    withName: "BLAST_BLASTN" {
+
+        // There are blast failures we don't know how to fix. Just ignore for now
+        errorStrategy = { task.exitStatus in ((130..145) + 104) ? (task.attempt == process.maxRetries ? 'ignore' : 'retry') : 'finish' }
+
+        // Most jobs complete quickly but some need a lot longer. For those outliers,
+        // the CPU usage remains usually low, often nearing a single CPU
+        cpus   = { check_max( 6    -            (task.attempt-1), 'cpus'   ) }
+        memory = { check_max( 1.GB * Math.pow(4, task.attempt-1), 'memory' ) }
+        time   = { check_max( 10.h * Math.pow(4, task.attempt-1), 'time'   ) }
+    }
+
     withName:CUSTOM_DUMPSOFTWAREVERSIONS {
         cache = false
     }
diff --git a/conf/modules.config b/conf/modules.config
index 439a77b3..ac597dc4 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -48,6 +48,14 @@ process {
         ext.args = { "-ax map-ont -I" + Math.ceil(meta2.genome_size/1e9) + 'G' }
     }
 
+    withName: "MINIMAP2_.*" {
+        publishDir = [
+            path: { "${params.outdir}/read_mapping/${meta.datatype}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals("versions.yml") ? null : filename }
+        ]
+    }
+
     withName: "SAMTOOLS_VIEW" {
         ext.args = "--output-fmt bam --write-index"
     }
@@ -60,6 +68,22 @@ process {
         ext.args = "--lineage --busco"
     }
 
+    withName: "PIGZ_COMPRESS" {
+        publishDir = [
+            path: { "${params.outdir}/base_content" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals("versions.yml") ? null : filename.minus("fw_out/") }
+        ]
+    }
+
+    withName: "BLOBTK_DEPTH" {
+        publishDir = [
+            path: { "${params.outdir}/read_mapping/${meta.datatype}" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals("versions.yml") ? null : "${meta.id}.coverage.1k.bed.gz" }
+        ]
+    }
+
     withName: "BUSCO" {
         scratch = true
         ext.args = { 'test' in workflow.profile.tokenize(',') ?
@@ -114,7 +138,7 @@ process {
     }
 
     withName: "BLAST_BLASTN" {
-        ext.args = "-outfmt '6 qseqid staxids bitscore std' -max_target_seqs 10 -max_hsps 1 -evalue 1.0e-10 -lcase_masking -dust '20 64 1'"
+        ext.args = "-task megablast -outfmt '6 qseqid staxids bitscore std' -max_target_seqs 10 -max_hsps 1 -evalue 1.0e-10 -lcase_masking -dust '20 64 1'"
     }
 
     withName: "CUSTOM_DUMPSOFTWAREVERSIONS" {
diff --git a/docs/output.md b/docs/output.md
index 18fe2b6d..e3204a1d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -15,6 +15,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 - [BlobDir](#blobdir) - Output files viewable on a [BlobToolKit viewer](https://github.com/blobtoolkit/blobtoolkit)
 - [Static plots](#static-plots) - Static versions of the BlobToolKit plots
 - [BUSCO](#busco) - BUSCO results
+- [Read alignments](#read-alignments) - Aligned reads (optional)
+- [Read coverage](#read-coverage) - Read coverage tracks
+- [Base content](#base-content) - _k_-mer statistics (for k &le; 4)
 - [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
@@ -26,8 +29,8 @@ The files in the BlobDir dataset which is used to create the online interactive
 <summary>Output files</summary>
 
 - `blobtoolkit/`
-  - `<accession>/`
-    - `*.json.gz`: files generated from genome and alignment coverage statistics
+  - `<assembly-name>/`
+    - `*.json.gz`: files generated from genome and alignment coverage statistics.
 
 More information about visualising the data in the [BlobToolKit repository](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/viewer)
 
@@ -53,12 +56,56 @@ BUSCO results generated by the pipeline (all BUSCO lineages that match the claas
 <details markdown="1">
 <summary>Output files</summary>
 
-- `blobtoolkit/`
-  - `busco/`
-    - `*.batch_summary.txt`: BUSCO scores as tab-separated files (1 file per lineage).
-    - `*.fasta.txt`: BUSCO scores as formatted text (1 file per lineage).
-    - `*.json`: BUSCO scores as JSON (1 file per lineage).
-    - `*/`: all output BUSCO files, including the coordinate and sequence files of the annotated genes.
+- `busco/`
+  - `<lineage-name>/`
+    - `short_summary.json`: BUSCO scores for that lineage as a tab-separated file.
+    - `short_summary.tsv`: BUSCO scores for that lineage as JSON.
+    - `short_summary.txt`: BUSCO scores for that lineage as formatted text.
+    - `full_table.tsv`: Coordinates of the annotated BUSCO genes as a tab-separated file.
+    - `missing_busco_list.tsv`: List of the BUSCO genes that could not be found.
+    - `*_busco_sequences.tar.gz`: Sequences of the annotated BUSCO genes. 1 _tar_ archive for each of the three annotation levels (`single_copy`, `multi_copy`, `fragmented`), with 1 file per gene.
+    - `hmmer_output.tar.gz`: Archive of the HMMER alignment scores.
+
+</details>
+
+### Read alignments
+
+Read alignments in BAM format -- only if the pipeline is run with `--align`.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `read_mapping/`
+  - `<datatype>/`
+    - `<sample>.bam`: alignments of that sample's reads in BAM format.
+
+</details>
+
+### Read coverage
+
+Read coverage statistics as computed by the pipeline.
+Those files are the raw data used to build the BlobDir.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `read_mapping/`
+  - `<datatype>/`
+    - `<sample>.coverage.1k.bed.gz`: Bedgraph file with the coverage of the alignments of that sample per 1 kbp windows.
+
+</details>
+
+### Base content
+
+_k_-mer statistics.
+Those files are the raw data used to build the BlobDir.
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `base_content/`
+  - `<assembly-name>_*nuc_windows.tsv.gz`: Tab-separated files with the counts of every _k_-mer for k &le; 4 in 1 kbp windows. The first three columns correspond to the coordinates (sequence name, start, end), followed by each _k_-mer.
+  - `<assembly-name>_freq_windows.tsv.gz`: Tab-separated files with frequencies derived from the _k_-mer counts.
 
 </details>
 
diff --git a/modules.json b/modules.json
index 667a4482..ebb45a6c 100644
--- a/modules.json
+++ b/modules.json
@@ -30,12 +30,14 @@
                     "diamond/blastp": {
                         "branch": "master",
                         "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/diamond/blastp/diamond-blastp.diff"
                     },
                     "diamond/blastx": {
                         "branch": "master",
                         "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/diamond/blastx/diamond-blastx.diff"
                     },
                     "fastawindows": {
                         "branch": "master",
@@ -64,6 +66,11 @@
                         "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
                         "installed_by": ["modules"]
                     },
+                    "pigz/compress": {
+                        "branch": "master",
+                        "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d",
+                        "installed_by": ["modules"]
+                    },
                     "samtools/fasta": {
                         "branch": "master",
                         "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62",
diff --git a/modules/local/blobtoolkit/updatemeta.nf b/modules/local/blobtoolkit/updatemeta.nf
index de1313d5..a5556348 100644
--- a/modules/local/blobtoolkit/updatemeta.nf
+++ b/modules/local/blobtoolkit/updatemeta.nf
@@ -5,7 +5,7 @@ process BLOBTOOLKIT_UPDATEMETA {
     if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
         exit 1, "BLOBTOOLKIT_UPDATEMETA module does not support Conda. Please use Docker / Singularity / Podman instead."
     }
-    container "docker.io/pacificbiosciences/pyyaml:5.3.1"
+    container "docker.io/genomehubs/blobtoolkit:4.3.9"
 
     input:
     tuple val(meta), path(input)
diff --git a/modules/nf-core/blast/blastn/blast-blastn.diff b/modules/nf-core/blast/blastn/blast-blastn.diff
index 1695c793..e01e07cb 100644
--- a/modules/nf-core/blast/blastn/blast-blastn.diff
+++ b/modules/nf-core/blast/blastn/blast-blastn.diff
@@ -1,7 +1,14 @@
 Changes in module 'nf-core/blast/blastn'
 --- modules/nf-core/blast/blastn/main.nf
 +++ modules/nf-core/blast/blastn/main.nf
-@@ -10,6 +10,7 @@
+@@ -1,6 +1,5 @@
+ process BLAST_BLASTN {
+     tag "$meta.id"
+-    label 'process_medium'
+ 
+     conda "${moduleDir}/environment.yml"
+     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+@@ -10,6 +9,7 @@
      input:
      tuple val(meta) , path(fasta)
      tuple val(meta2), path(db)
@@ -9,21 +16,31 @@ Changes in module 'nf-core/blast/blastn'
  
      output:
      tuple val(meta), path('*.txt'), emit: txt
-@@ -23,6 +24,7 @@
+@@ -23,6 +23,8 @@
      def prefix = task.ext.prefix ?: "${meta.id}"
      def is_compressed = fasta.getExtension() == "gz" ? true : false
      def fasta_name = is_compressed ? fasta.getBaseName() : fasta
 +    def exclude_taxon = taxid ? "-negative_taxids ${taxid}" : ''
++    def command_epilog = taxid ? "|| true" : ''
  
      """
      if [ "${is_compressed}" == "true" ]; then
-@@ -39,6 +41,7 @@
+@@ -39,8 +41,15 @@
          -num_threads ${task.cpus} \\
          -db \$DB \\
          -query ${fasta_name} \\
 +        ${exclude_taxon} \\
          ${args} \\
-         -out ${prefix}.txt
+-        -out ${prefix}.txt
++        -out ${prefix}.txt \\
++        2> >( tee "${prefix}.error.log" >&2 ) $command_epilog
++
++    if [[ -s "${prefix}.error.log" ]]
++    then
++        grep -qF 'BLAST Database error: Taxonomy ID(s) not found.Taxonomy ID(s) not found' "${prefix}.error.log"
++    fi
  
+     cat <<-END_VERSIONS > versions.yml
+     "${task.process}":
 
 ************************************************************
diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf
index 065ad7cd..d674989a 100644
--- a/modules/nf-core/blast/blastn/main.nf
+++ b/modules/nf-core/blast/blastn/main.nf
@@ -1,6 +1,5 @@
 process BLAST_BLASTN {
     tag "$meta.id"
-    label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -25,6 +24,7 @@ process BLAST_BLASTN {
     def is_compressed = fasta.getExtension() == "gz" ? true : false
     def fasta_name = is_compressed ? fasta.getBaseName() : fasta
     def exclude_taxon = taxid ? "-negative_taxids ${taxid}" : ''
+    def command_epilog = taxid ? "|| true" : ''
 
     """
     if [ "${is_compressed}" == "true" ]; then
@@ -43,7 +43,13 @@ process BLAST_BLASTN {
         -query ${fasta_name} \\
         ${exclude_taxon} \\
         ${args} \\
-        -out ${prefix}.txt
+        -out ${prefix}.txt \\
+        2> >( tee "${prefix}.error.log" >&2 ) $command_epilog
+
+    if [[ -s "${prefix}.error.log" ]]
+    then
+        grep -qF 'BLAST Database error: Taxonomy ID(s) not found.Taxonomy ID(s) not found' "${prefix}.error.log"
+    fi
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/diamond/blastp/diamond-blastp.diff b/modules/nf-core/diamond/blastp/diamond-blastp.diff
new file mode 100644
index 00000000..12608ea0
--- /dev/null
+++ b/modules/nf-core/diamond/blastp/diamond-blastp.diff
@@ -0,0 +1,29 @@
+Changes in module 'nf-core/diamond/blastp'
+--- modules/nf-core/diamond/blastp/main.nf
++++ modules/nf-core/diamond/blastp/main.nf
+@@ -12,6 +12,7 @@
+     tuple val(meta2), path(db)
+     val out_ext
+     val blast_columns
++    val taxid
+ 
+     output:
+     tuple val(meta), path('*.blast'), optional: true, emit: blast
+@@ -32,6 +33,7 @@
+     def is_compressed = fasta.getExtension() == "gz" ? true : false
+     def fasta_name = is_compressed ? fasta.getBaseName() : fasta
+     def columns = blast_columns ? "${blast_columns}" : ''
++    def exclude_taxon = taxid ? "--taxon-exclude ${taxid}" : ''
+     switch ( out_ext ) {
+         case "blast": outfmt = 0; break
+         case "xml": outfmt = 5; break
+@@ -59,6 +61,7 @@
+         --db \$DB \\
+         --query ${fasta_name} \\
+         --outfmt ${outfmt} ${columns} \\
++        ${exclude_taxon} \\
+         ${args} \\
+         --out ${prefix}.${out_ext}
+ 
+
+************************************************************
diff --git a/modules/nf-core/diamond/blastp/main.nf b/modules/nf-core/diamond/blastp/main.nf
index dc01cdcc..ae5a1248 100644
--- a/modules/nf-core/diamond/blastp/main.nf
+++ b/modules/nf-core/diamond/blastp/main.nf
@@ -12,6 +12,7 @@ process DIAMOND_BLASTP {
     tuple val(meta2), path(db)
     val out_ext
     val blast_columns
+    val taxid
 
     output:
     tuple val(meta), path('*.blast'), optional: true, emit: blast
@@ -32,6 +33,7 @@ process DIAMOND_BLASTP {
     def is_compressed = fasta.getExtension() == "gz" ? true : false
     def fasta_name = is_compressed ? fasta.getBaseName() : fasta
     def columns = blast_columns ? "${blast_columns}" : ''
+    def exclude_taxon = taxid ? "--taxon-exclude ${taxid}" : ''
     switch ( out_ext ) {
         case "blast": outfmt = 0; break
         case "xml": outfmt = 5; break
@@ -59,6 +61,7 @@ process DIAMOND_BLASTP {
         --db \$DB \\
         --query ${fasta_name} \\
         --outfmt ${outfmt} ${columns} \\
+        ${exclude_taxon} \\
         ${args} \\
         --out ${prefix}.${out_ext}
 
diff --git a/modules/nf-core/diamond/blastx/diamond-blastx.diff b/modules/nf-core/diamond/blastx/diamond-blastx.diff
new file mode 100644
index 00000000..eff4326a
--- /dev/null
+++ b/modules/nf-core/diamond/blastx/diamond-blastx.diff
@@ -0,0 +1,29 @@
+Changes in module 'nf-core/diamond/blastx'
+--- modules/nf-core/diamond/blastx/main.nf
++++ modules/nf-core/diamond/blastx/main.nf
+@@ -12,6 +12,7 @@
+     tuple val(meta2), path(db)
+     val out_ext
+     val blast_columns
++    val taxid
+ 
+     output:
+     tuple val(meta), path('*.blast'), optional: true, emit: blast
+@@ -33,6 +34,7 @@
+     def is_compressed = fasta.getExtension() == "gz" ? true : false
+     def fasta_name = is_compressed ? fasta.getBaseName() : fasta
+     def columns = blast_columns ? "${blast_columns}" : ''
++    def exclude_taxon = taxid ? "--taxon-exclude ${taxid}" : ''
+     switch ( out_ext ) {
+         case "blast": outfmt = 0; break
+         case "xml": outfmt = 5; break
+@@ -60,6 +62,7 @@
+         --db \$DB \\
+         --query ${fasta_name} \\
+         --outfmt ${outfmt} ${columns} \\
++        ${exclude_taxon} \\
+         ${args} \\
+         --out ${prefix}.${out_ext} \\
+         --log
+
+************************************************************
diff --git a/modules/nf-core/diamond/blastx/main.nf b/modules/nf-core/diamond/blastx/main.nf
index bf3f623c..dfa82e24 100644
--- a/modules/nf-core/diamond/blastx/main.nf
+++ b/modules/nf-core/diamond/blastx/main.nf
@@ -12,6 +12,7 @@ process DIAMOND_BLASTX {
     tuple val(meta2), path(db)
     val out_ext
     val blast_columns
+    val taxid
 
     output:
     tuple val(meta), path('*.blast'), optional: true, emit: blast
@@ -33,6 +34,7 @@ process DIAMOND_BLASTX {
     def is_compressed = fasta.getExtension() == "gz" ? true : false
     def fasta_name = is_compressed ? fasta.getBaseName() : fasta
     def columns = blast_columns ? "${blast_columns}" : ''
+    def exclude_taxon = taxid ? "--taxon-exclude ${taxid}" : ''
     switch ( out_ext ) {
         case "blast": outfmt = 0; break
         case "xml": outfmt = 5; break
@@ -60,6 +62,7 @@ process DIAMOND_BLASTX {
         --db \$DB \\
         --query ${fasta_name} \\
         --outfmt ${outfmt} ${columns} \\
+        ${exclude_taxon} \\
         ${args} \\
         --out ${prefix}.${out_ext} \\
         --log
diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml
new file mode 100644
index 00000000..7551d187
--- /dev/null
+++ b/modules/nf-core/pigz/compress/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "pigz_compress"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "pigz=2.8"
diff --git a/modules/nf-core/pigz/compress/main.nf b/modules/nf-core/pigz/compress/main.nf
new file mode 100644
index 00000000..152e7006
--- /dev/null
+++ b/modules/nf-core/pigz/compress/main.nf
@@ -0,0 +1,45 @@
+process PIGZ_COMPRESS {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pigz:2.8':
+        'biocontainers/pigz:2.8' }"
+
+    input:
+    tuple val(meta), path(raw_file)
+
+    output:
+    tuple val(meta), path("$archive"), emit: archive
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    archive = raw_file.toString() + ".gz"
+    """
+    # Note: needs --stdout for pigz to avoid the following issue:
+    #   pigz: skipping: ${raw_file} is a symbolic link
+    pigz --processes $task.cpus --stdout --force ${args} ${raw_file} > ${archive}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    archive = raw_file.toString() + ".gz"
+    """
+    touch ${archive}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml
new file mode 100644
index 00000000..42efd735
--- /dev/null
+++ b/modules/nf-core/pigz/compress/meta.yml
@@ -0,0 +1,47 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "pigz_compress"
+description: Compresses files with pigz.
+keywords:
+  - compress
+  - gzip
+  - parallelized
+tools:
+  - "pigz":
+      description: "Parallel implementation of the gzip algorithm."
+      homepage: "https://zlib.net/pigz/"
+      documentation: "https://zlib.net/pigz/pigz.pdf"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+
+  - raw_file:
+      type: file
+      description: File to be compressed
+      pattern: "*.*"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+
+  - archive:
+      type: file
+      description: The compressed file
+      pattern: "*.gz"
+
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@leoisl"
+maintainers:
+  - "@leoisl"
diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test
new file mode 100644
index 00000000..248d40fb
--- /dev/null
+++ b/modules/nf-core/pigz/compress/tests/main.nf.test
@@ -0,0 +1,49 @@
+nextflow_process {
+    name "Test Process PIGZ_COMPRESS"
+    script "../main.nf"
+    process "PIGZ_COMPRESS"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "pigz"
+    tag "pigz/compress"
+
+    test("sarscov2 - genome - fasta") {
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test'], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 - genome - fasta - stub") {
+        options "-stub-run"
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test'], // meta map
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                    ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(file(process.out.archive[0][1]).name).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap
new file mode 100644
index 00000000..6e50456f
--- /dev/null
+++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap
@@ -0,0 +1,37 @@
+{
+    "sarscov2 - genome - fasta": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad"
+                ],
+                "archive": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad"
+                ]
+            }
+        ],
+        "timestamp": "2023-12-11T22:39:53.350546"
+    },
+    "sarscov2 - genome - fasta - stub": {
+        "content": [
+            "genome.fasta.gz"
+        ],
+        "timestamp": "2023-12-11T22:52:24.309192"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/pigz/compress/tests/tags.yml b/modules/nf-core/pigz/compress/tests/tags.yml
new file mode 100644
index 00000000..42c46bfa
--- /dev/null
+++ b/modules/nf-core/pigz/compress/tests/tags.yml
@@ -0,0 +1,2 @@
+pigz/compress:
+  - "modules/nf-core/pigz/compress/**"
diff --git a/nextflow.config b/nextflow.config
index 83aaaafc..db5ef388 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,11 +17,10 @@ params {
     mask                       = false
     fetchngs_samplesheet       = false
 
-    // Reference options    
+    // Reference options
     fasta                      = null
     accession                  = null
     taxon                      = null
-    taxa_file                  = null
 
     // Output options
     image_format               = 'png'
@@ -32,10 +31,6 @@ params {
     blastp                     = null
     blastx                     = null
     blastn                     = null
-    blastp_outext              = 'txt'
-    blastp_cols                = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
-    blastx_outext              = 'txt'
-    blastx_cols                = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
 
     // MultiQC options
     multiqc_config             = null
@@ -248,7 +243,7 @@ manifest {
     description     = """Quality assessment of genome assemblies"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '0.4.0'
+    version         = '0.5.0'
     doi             = '10.5281/zenodo.7949058'
 }
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 97c84534..b392e2a5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -75,7 +75,7 @@
             "type": "object",
             "fa_icon": "fas fa-dna",
             "description": "Reference genome related files and options required for the workflow.",
-            "required": ["taxon", "accession", "fasta"],
+            "required": ["taxon", "fasta"],
             "properties": {
                 "taxon": {
                     "type": ["string", "integer"],
@@ -102,43 +102,12 @@
             "description": "Define the location and parameters to work with databases.",
             "required": ["blastp", "blastx", "blastn", "taxdump"],
             "properties": {
-                "taxa_file": {
-                    "type": "string",
-                    "format": "file-path",
-                    "description": "Path to file containing the BUSCO lineages for the genome species",
-                    "help_text": "If this file is not included, the relevant BUSCO lineages are automatically calculated using the taxon parameter.",
-                    "fa_icon": "fas fa-file-alt"
-                },
                 "busco": {
                     "type": "string",
                     "format": "directory-path",
                     "description": "Local directory where clade-specific BUSCO lineage datasets are stored",
                     "fa_icon": "fas fa-folder-open"
                 },
-                "blastp_cols": {
-                    "type": "string",
-                    "description": "When blastp_outext is 'txt', this is the list of columns that Diamond BLAST should print.",
-                    "default": "qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore"
-                },
-                "blastp_outext": {
-                    "type": "string",
-                    "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
-                    "description": "Extension (file format) of the output file from Diamond BLAST.",
-                    "fa_icon": "fas fa-file-circle-question",
-                    "default": "txt"
-                },
-                "blastx_cols": {
-                    "type": "string",
-                    "description": "When blastx_outext is 'txt', this is the list of columns that Diamond BLAST should print.",
-                    "default": "qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore"
-                },
-                "blastx_outext": {
-                    "type": "string",
-                    "enum": ["blast", "xml", "txt", "daa", "sam", "tsv", "paf"],
-                    "description": "Extension (file format) of the output file from Diamond BLAST.",
-                    "fa_icon": "fas fa-file-circle-question",
-                    "default": "txt"
-                },
                 "blastp": {
                     "type": "string",
                     "format": "file-path",
diff --git a/subworkflows/local/blobtools.nf b/subworkflows/local/blobtools.nf
index 8411ad24..747bc9fa 100644
--- a/subworkflows/local/blobtools.nf
+++ b/subworkflows/local/blobtools.nf
@@ -28,14 +28,14 @@ workflow BLOBTOOLS {
     ch_versions = ch_versions.mix ( BLOBTOOLKIT_METADATA.out.versions.first() )
 
 
-    //  
+    //
     // Create Blobtools dataset files
     //
     BLOBTOOLKIT_CREATEBLOBDIR ( windowstats, busco, blastp, BLOBTOOLKIT_METADATA.out.yaml, taxdump )
     ch_versions = ch_versions.mix ( BLOBTOOLKIT_CREATEBLOBDIR.out.versions.first() )
 
 
-    //  
+    //
     // Update Blobtools dataset files
     //
     BLOBTOOLKIT_UPDATEBLOBDIR ( BLOBTOOLKIT_CREATEBLOBDIR.out.blobdir, blastx, blastn, taxdump )
diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf
index c3ebe104..2a89471f 100644
--- a/subworkflows/local/busco_diamond_blastp.nf
+++ b/subworkflows/local/busco_diamond_blastp.nf
@@ -12,11 +12,9 @@ include { RESTRUCTUREBUSCODIR       } from '../../modules/local/restructurebusco
 workflow BUSCO_DIAMOND {
     take:
     fasta        // channel: [ val(meta), path(fasta) ]
-    taxon_taxa   // channel: [ val(meta, val(taxon), path(taxa) ]
+    taxon        // channel: val(taxon)
     busco_db     // channel: path(busco_db)
     blastp       // channel: path(blastp_db)
-    outext       // channel: val(out_format)
-    cols         // channel: val(column_names)
 
 
     main:
@@ -24,11 +22,13 @@ workflow BUSCO_DIAMOND {
 
 
     //
-    // Fetch BUSCO lineages for taxon (or taxa)
+    // Fetch BUSCO lineages for taxon
     //
-    GOAT_TAXONSEARCH ( taxon_taxa )
+    GOAT_TAXONSEARCH (
+        fasta.combine(taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] }
+    )
     ch_versions = ch_versions.mix ( GOAT_TAXONSEARCH.out.versions.first() )
-    
+
 
     //
     // Get NCBI species ID
@@ -39,6 +39,7 @@ workflow BUSCO_DIAMOND {
     | transpose()
     | filter { rank,id -> rank =~ /species/ }
     | map { rank, id -> id}
+    | first
     | set { ch_taxid }
 
 
@@ -70,7 +71,7 @@ workflow BUSCO_DIAMOND {
         ch_fasta_with_lineage,
         "genome",
         ch_fasta_with_lineage.map { it[0].lineage_name },
-        busco_db.collect().ifEmpty([]),
+        busco_db,
         [],
     )
     ch_versions = ch_versions.mix ( BUSCO.out.versions.first() )
@@ -108,12 +109,15 @@ workflow BUSCO_DIAMOND {
 
     //
     // Align BUSCO genes against the BLASTp database
-    //    
+    //
     BLOBTOOLKIT_EXTRACTBUSCOS.out.genes
     | filter { it[1].size() > 140 }
     | set { ch_busco_genes }
 
-    DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols )
+    // Hardcoded to match the format expected by blobtools
+    def outext = 'txt'
+    def cols   = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
+    DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols, ch_taxid )
     ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() )
 
 
@@ -141,7 +145,7 @@ workflow BUSCO_DIAMOND {
 
 
     emit:
-    first_table = ch_first_table          // channel: [ val(meta), path(full_table) ] 
+    first_table = ch_first_table          // channel: [ val(meta), path(full_table) ]
     all_tables  = ch_indexed_buscos       // channel: [ val(meta), path(full_tables) ]
     blastp_txt  = DIAMOND_BLASTP.out.txt  // channel: [ val(meta), path(txt) ]
     taxon_id    = ch_taxid                // channel: taxon_id
diff --git a/subworkflows/local/collate_stats.nf b/subworkflows/local/collate_stats.nf
index 08bc43c9..b986188d 100644
--- a/subworkflows/local/collate_stats.nf
+++ b/subworkflows/local/collate_stats.nf
@@ -8,7 +8,7 @@ include { BLOBTOOLKIT_WINDOWSTATS } from '../../modules/local/blobtoolkit/window
 
 
 workflow COLLATE_STATS {
-    take: 
+    take:
     busco       // channel: [ val(meta), path(full_table) ]
     bed         // channel: [ val(meta), path(bed) ]
     freq        // channel: [ val(meta), path(freq) ]
diff --git a/subworkflows/local/coverage_stats.nf b/subworkflows/local/coverage_stats.nf
index 79b39a8a..86703851 100644
--- a/subworkflows/local/coverage_stats.nf
+++ b/subworkflows/local/coverage_stats.nf
@@ -6,12 +6,13 @@ include { SAMTOOLS_VIEW  } from '../../modules/nf-core/samtools/view/main'
 include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
 include { BLOBTK_DEPTH   } from '../../modules/local/blobtk/depth'
 include { FASTAWINDOWS   } from '../../modules/nf-core/fastawindows/main'
+include { PIGZ_COMPRESS  } from '../../modules/nf-core/pigz/compress/main'
 include { CREATE_BED     } from '../../modules/local/create_bed'
 
 
 workflow COVERAGE_STATS {
-    take: 
-    input    // channel: [ val(meta), path(aln) ] 
+    take:
+    input    // channel: [ val(meta), path(aln) ]
     fasta    // channel: [ val(meta), path(fasta) ]
 
 
@@ -53,11 +54,22 @@ workflow COVERAGE_STATS {
     ch_versions = ch_versions.mix ( FASTAWINDOWS.out.versions.first() )
 
 
+    // Compress the TSV files
+    PIGZ_COMPRESS (
+        FASTAWINDOWS.out.mononuc
+        | mix ( FASTAWINDOWS.out.dinuc )
+        | mix ( FASTAWINDOWS.out.trinuc )
+        | mix ( FASTAWINDOWS.out.tetranuc )
+        | mix ( FASTAWINDOWS.out.freq )
+    )
+    ch_versions = ch_versions.mix ( PIGZ_COMPRESS.out.versions.first() )
+
+
     // Create genome windows file in BED format
     CREATE_BED ( FASTAWINDOWS.out.mononuc )
     ch_versions = ch_versions.mix ( CREATE_BED.out.versions.first() )
 
-    
+
     // Calculate coverage
     BLOBTK_DEPTH ( ch_bam_csi )
     ch_versions = ch_versions.mix ( BLOBTK_DEPTH.out.versions.first() )
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index da522ca8..d498269f 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -20,7 +20,7 @@ workflow INPUT_CHECK {
     if ( params.fetchngs_samplesheet ) {
         FETCHNGSSAMPLESHEET_CHECK ( samplesheet )
             .csv
-            .splitCsv ( header:true, sep:',' )
+            .splitCsv ( header:true, sep:',', quote:'"' )
             .branch { row ->
                 paired: row.fastq_2
                     [[id: row.run_accession, row:row], [row.fastq_1, row.fastq_2]]
diff --git a/subworkflows/local/minimap_alignment.nf b/subworkflows/local/minimap_alignment.nf
index 1d6263b3..0c25f4c7 100644
--- a/subworkflows/local/minimap_alignment.nf
+++ b/subworkflows/local/minimap_alignment.nf
@@ -1,4 +1,4 @@
-// 
+//
 // Optional alignment subworkflow using Minimap2
 //
 
@@ -52,7 +52,7 @@ workflow MINIMAP2_ALIGNMENT {
     // Align with Minimap2
     MINIMAP2_HIC ( ch_input.hic, fasta, true, false, false )
     ch_versions = ch_versions.mix(MINIMAP2_HIC.out.versions.first())
-    
+
     MINIMAP2_ILMN ( ch_input.illumina, fasta, true, false, false )
     ch_versions = ch_versions.mix(MINIMAP2_ILMN.out.versions.first())
 
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index 0b426fae..a1f03980 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -48,7 +48,7 @@ workflow PREPARE_GENOME {
         ch_fasta = ch_genome
     }
 
-    
+
     emit:
     genome   = ch_fasta            // channel: [ meta, path(genome) ]
     versions = ch_versions         // channel: [ versions.yml ]
diff --git a/subworkflows/local/run_blastn.nf b/subworkflows/local/run_blastn.nf
index cc1fa6c5..1ea64b82 100644
--- a/subworkflows/local/run_blastn.nf
+++ b/subworkflows/local/run_blastn.nf
@@ -12,8 +12,8 @@ include { BLOBTOOLKIT_UNCHUNK          } from '../../modules/local/blobtoolkit/u
 
 
 workflow RUN_BLASTN {
-    take: 
-    blast_table  // channel: [ val(meta), path(blast_table) ] 
+    take:
+    blast_table  // channel: [ val(meta), path(blast_table) ]
     fasta        // channel: [ val(meta), path(fasta) ]
     blastn       // channel: [ val(meta), path(blastn_db) ]
     taxon_id     // channel: val(taxon_id)
@@ -27,16 +27,16 @@ workflow RUN_BLASTN {
     // Get list of sequence ids with no hits in diamond blastx search
     NOHIT_LIST ( blast_table, fasta )
     ch_versions = ch_versions.mix ( NOHIT_LIST.out.versions.first() )
- 
+
     // Subset of sequences with no hits
     SEQTK_SUBSEQ (
         fasta,
-        NOHIT_LIST.out.nohitlist.map { meta, nohit -> nohit }
+        NOHIT_LIST.out.nohitlist.map { meta, nohit -> nohit } . filter { it.size() > 0 }
     )
     ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() )
-    
-    
-    //  Split long contigs into chunks 
+
+
+    //  Split long contigs into chunks
     // create chunks
     BLOBTOOLKIT_CHUNK ( SEQTK_SUBSEQ.out.sequences, [[],[]] )
     ch_versions = ch_versions.mix ( BLOBTOOLKIT_CHUNK.out.versions.first() )
diff --git a/subworkflows/local/run_blastx.nf b/subworkflows/local/run_blastx.nf
index 1bad6f6d..715e5ae2 100644
--- a/subworkflows/local/run_blastx.nf
+++ b/subworkflows/local/run_blastx.nf
@@ -11,8 +11,7 @@ workflow RUN_BLASTX {
     fasta      // channel: [ val(meta), path(fasta) ]
     table      // channel: [ val(meta), path(busco_table) ]
     blastx     // channel: [ val(meta), path(blastx_db) ]
-    outext     // channel: val(out_format)
-    cols       // channel: val(column_names)
+    taxon_id   // channel: val(taxon_id)
 
 
     main:
@@ -29,9 +28,12 @@ workflow RUN_BLASTX {
     //
     // Run diamond_blastx
     //
-    DIAMOND_BLASTX ( BLOBTOOLKIT_CHUNK.out.chunks, blastx, outext, cols)
+    // Hardocded to match the format expected by blobtools
+    def outext = 'txt'
+    def cols   = 'qseqid staxids bitscore qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore'
+    DIAMOND_BLASTX ( BLOBTOOLKIT_CHUNK.out.chunks, blastx, outext, cols, taxon_id )
     ch_versions = ch_versions.mix ( DIAMOND_BLASTX.out.versions.first() )
-    
+
 
     //
     // Unchunk chunked blastx results
diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf
index f25da1eb..3610cdde 100644
--- a/workflows/blobtoolkit.nf
+++ b/workflows/blobtoolkit.nf
@@ -17,22 +17,24 @@ WorkflowBlobtoolkit.initialise(params, log)
 
 // Add all file path parameters for the pipeline to the list below
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.taxa_file, params.taxdump, params.busco, params.blastp, params.blastx ]
+def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.taxdump, params.busco, params.blastp, params.blastx ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
-if (params.fasta && params.accession) { ch_fasta = Channel.of([ [ 'id': params.accession ], params.fasta ]).first() } else { exit 1, 'Genome fasta file and accession must be specified!' }
-if (params.taxon) { ch_taxon = Channel.of(params.taxon) } else { exit 1, 'NCBI Taxon ID not specified!' }
-if (params.blastp && params.accession) { ch_blastp = Channel.of([ [ 'id': params.accession ], params.blastp ]).first() } else { exit 1, 'Diamond BLASTp database and accession must be specified!' }
-if (params.blastx && params.accession) { ch_blastx = Channel.of([ [ 'id': params.accession ], params.blastx ]).first() } else { exit 1, 'Diamond BLASTx database and accession must be specified!' }
-if (params.blastn && params.accession) { ch_blastn = Channel.of([ [ 'id': params.accession ], params.blastn ]).first() } else { exit 1, 'BLASTn database not specified!' }
+if (params.fasta) { ch_fasta = Channel.value([ [ 'id': params.accession ?: file(params.fasta.replace(".gz", "")).baseName ], file(params.fasta) ]) } else { exit 1, 'Genome fasta file must be specified!' }
+if (params.taxon) { ch_taxon = Channel.value(params.taxon) } else { exit 1, 'NCBI Taxon ID not specified!' }
+if (params.blastp) { ch_blastp = Channel.value([ [ 'id': file(params.blastp).baseName ], params.blastp ]) } else { exit 1, 'Diamond BLASTp database must be specified!' }
+if (params.blastx) { ch_blastx = Channel.value([ [ 'id': file(params.blastx).baseName ], params.blastx ]) } else { exit 1, 'Diamond BLASTx database must be specified!' }
+if (params.blastn) { ch_blastn = Channel.value([ [ 'id': file(params.blastn).baseName ], params.blastn ]) } else { exit 1, 'BLASTn database not specified!' }
 if (params.taxdump) { ch_taxdump = file(params.taxdump) } else { exit 1, 'NCBI Taxonomy database not specified!' }
 if (params.fetchngs_samplesheet && !params.align) { exit 1, '--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!' }
 
 // Create channel for optional parameters
-if (params.busco) { ch_busco_db = Channel.fromPath(params.busco) } else { ch_busco_db = Channel.empty() }
-if (params.yaml && params.accession) { ch_yaml = Channel.of([ [ 'id': params.accession ], params.yaml ]) } else { ch_yaml = Channel.empty() }
+if (params.busco) { ch_busco_db = Channel.fromPath(params.busco).first() } else { ch_busco_db = Channel.value([]) }
+if (params.yaml) { ch_yaml = Channel.fromPath(params.yaml) } else { ch_yaml = Channel.empty() }
+if (params.yaml && params.accession) { exit 1, '--yaml cannot be provided at the same time as --accession !' }
+if (!params.yaml && !params.accession) { exit 1, '--yaml and --accession are both mising. Pick one !' }
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -51,11 +53,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-//
-// MODULE: Loaded from modules/local/
-//
-include { BLOBTOOLKIT_CONFIG     } from '../modules/local/blobtoolkit/config'
-
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
@@ -108,7 +105,7 @@ workflow BLOBTOOLKIT {
     INPUT_CHECK ( ch_input, PREPARE_GENOME.out.genome, ch_yaml )
     ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions )
 
-    // 
+    //
     // SUBWORKFLOW: Optional read alignment
     //
     if ( params.align ) {
@@ -120,7 +117,7 @@ workflow BLOBTOOLKIT {
     }
 
     //
-    // SUBWORKFLOW: Calculate genome coverage and statistics 
+    // SUBWORKFLOW: Calculate genome coverage and statistics
     //
     COVERAGE_STATS ( ch_aligned, PREPARE_GENOME.out.genome )
     ch_versions = ch_versions.mix ( COVERAGE_STATS.out.versions )
@@ -128,32 +125,22 @@ workflow BLOBTOOLKIT {
     //
     // SUBWORKFLOW: Run BUSCO using lineages fetched from GOAT, then run diamond_blastp
     //
-    if (params.taxa_file) { 
-        ch_taxa = Channel.from(params.taxa_file)
-        ch_taxon_taxa = PREPARE_GENOME.out.genome.combine(ch_taxon).combine(ch_taxa).map { meta, fasta, taxon, taxa -> [ meta, taxon, taxa ] }
-    } else { 
-        ch_taxon_taxa = PREPARE_GENOME.out.genome.combine(ch_taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] }
-    }
-
-    BUSCO_DIAMOND ( 
-        PREPARE_GENOME.out.genome, 
-        ch_taxon_taxa, 
-        ch_busco_db, 
-        ch_blastp, 
-        params.blastp_outext, 
-        params.blastp_cols 
+    BUSCO_DIAMOND (
+        PREPARE_GENOME.out.genome,
+        ch_taxon,
+        ch_busco_db,
+        ch_blastp,
     )
     ch_versions = ch_versions.mix ( BUSCO_DIAMOND.out.versions )
-    
+
     //
     // SUBWORKFLOW: Diamond blastx search of assembly contigs against the UniProt reference proteomes
     //
-    RUN_BLASTX ( 
+    RUN_BLASTX (
         PREPARE_GENOME.out.genome,
         BUSCO_DIAMOND.out.first_table,
         ch_blastx,
-        params.blastx_outext,
-        params.blastx_cols
+        BUSCO_DIAMOND.out.taxon_id,
     )
     ch_versions = ch_versions.mix ( RUN_BLASTX.out.versions )
 
@@ -161,29 +148,29 @@ workflow BLOBTOOLKIT {
     //
     // SUBWORKFLOW: Run blastn search on sequences that had no blastx hits
     //
-    RUN_BLASTN ( 
-        RUN_BLASTX.out.blastx_out, 
-        PREPARE_GENOME.out.genome, 
-        ch_blastn, 
-        BUSCO_DIAMOND.out.taxon_id
+    RUN_BLASTN (
+        RUN_BLASTX.out.blastx_out,
+        PREPARE_GENOME.out.genome,
+        ch_blastn,
+        BUSCO_DIAMOND.out.taxon_id,
     )
-    
+
     //
     // SUBWORKFLOW: Collate genome statistics by various window sizes
     //
-    COLLATE_STATS ( 
+    COLLATE_STATS (
         BUSCO_DIAMOND.out.all_tables,
-        COVERAGE_STATS.out.bed, 
-        COVERAGE_STATS.out.freq, 
-        COVERAGE_STATS.out.mononuc, 
-        COVERAGE_STATS.out.cov 
+        COVERAGE_STATS.out.bed,
+        COVERAGE_STATS.out.freq,
+        COVERAGE_STATS.out.mononuc,
+        COVERAGE_STATS.out.cov
     )
     ch_versions = ch_versions.mix ( COLLATE_STATS.out.versions )
 
     //
     // SUBWORKFLOW: Create BlobTools dataset
     //
-    BLOBTOOLS ( 
+    BLOBTOOLS (
         INPUT_CHECK.out.config,
         COLLATE_STATS.out.window_tsv,
         BUSCO_DIAMOND.out.all_tables,
@@ -193,7 +180,7 @@ workflow BLOBTOOLKIT {
         ch_taxdump
     )
     ch_versions = ch_versions.mix ( BLOBTOOLS.out.versions )
-    
+
     //
     // SUBWORKFLOW: Generate summary and static images
     //