Skip to content

Commit

Permalink
refactor Mash paste in db build scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
oschwengers committed Jan 11, 2024
1 parent acac0c7 commit f13fb27
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 6 deletions.
5 changes: 3 additions & 2 deletions db-scripts/build-db-gtdb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ gunzip metadata.tsv.gz
DATA_PATH=$(readlink -f gtdb_genomes_reps_r*/database)
nextflow run $REFERENCE_SEEKER_HOME/db-scripts/build-db-gtdb.nf --metadata ./metadata.tsv --representatives $DATA_PATH --domain $DOMAIN || { echo "Nextflow failed!"; exit; }

mash paste db sketches/*.msh || { echo "Mash failed!"; exit; }
find sketches/ -type f -name '*.msh' -exec realpath {} + > sketches.fof
mash paste -l db sketches.fof || { echo "Mash failed!"; exit; }

rm -rf work/ .nextflow* sketches/ *.tsv *_metadata.tar.gz gtdb_genomes_reps.tar.gz
rm -rf work/ .nextflow* sketches/ sketches.fof metadata.tsv gtdb_genomes_*

mv db.msh $DOMAIN/

Expand Down
5 changes: 3 additions & 2 deletions db-scripts/build-plasmids-db-plsdb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ echo "Unzip plasmid sequences..."
bunzip2 plsdb.fna.bz2
nextflow run $REFERENCE_SEEKER_HOME/db-scripts/build-plasmids-db-plsdb.nf --plasmids plsdb.fna || { echo "Nextflow failed!"; exit; }

mash paste db sketches/*.msh || { echo "Mash failed!"; exit; }
find sketches/ -type f -name '*.msh' -exec realpath {} + > sketches.fof
mash paste -l db sketches.fof || { echo "Mash failed!"; exit; }

rm -rf work/ .nextflow* sketches/ plsdb.fna
rm -rf work/ .nextflow* sketches/ sketches.fof plsdb.fna

mv db.msh plasmids-plsdb/

Expand Down
5 changes: 3 additions & 2 deletions db-scripts/build-plasmids-db-refseq.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@ zcat plasmid.* > plasmids.fna

nextflow run $REFERENCE_SEEKER_HOME/db-scripts/build-plasmids-db-refseq.nf --plasmids plasmids.fna || { echo "Nextflow failed!"; exit; }

mash paste db sketches/*.msh || { echo "Mash failed!"; exit; }
find sketches/ -type f -name '*.msh' -exec realpath {} + > sketches.fof
mash paste -l db sketches.fof || { echo "Mash failed!"; exit; }

rm -rf work/ .nextflow* sketches/ plasmid.* plasmids.fna
rm -rf work/ .nextflow* sketches/ plasmid.* sketches.fof plasmids.fna

mv db.msh plasmids-refseq/

Expand Down

0 comments on commit f13fb27

Please sign in to comment.