Skip to content

Commit

Permalink
Merge pull request #4 from GhazalehManj/main
Browse files Browse the repository at this point in the history
SCanD-updates
  • Loading branch information
GhazalehManj authored Mar 22, 2024
2 parents 8bd5863 + 01156db commit d6204b6
Show file tree
Hide file tree
Showing 43 changed files with 1,960 additions and 305 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# unnecessary directories
logs/*
data/*
work/*

339 changes: 268 additions & 71 deletions README.md

Large diffs are not rendered by default.

69 changes: 69 additions & 0 deletions Workflow Automation_stages.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Automated Pipeline Coordination and Code Integration for Efficient Workflow Execution

In this project, we have devised a streamlined solution for managing multiple pipelines with a focus on seamless coordination and code integration. Our approach involves the creation of an automated system that orchestrates the execution of diverse pipelines each stage. By combining and organizing the necessary codes for each stage's tasks, we aim to optimize workflow efficiency.

After setting up the scinet environment and organizing your bids folder and participants.csv file, you can run the codes for each stage.

## stage 1 (mriqc, fmriprep_anat, qsiprep):
```sh
# note step one is to make sure you are on one of the login nodes
ssh nia-login07

## go to the repo and pull new changes
cd ${SCRATCH}/SCanD_project_GMANJ
git pull #in case you need to pull new code

source ./stage_1.sh
```


## stage 2 (fmriprep_func, qsirecon1):

```sh
# note step one is to make sure you are on one of the login nodes
ssh nia-login07

## go to the repo and pull new changes
cd ${SCRATCH}/SCanD_project_GMANJ
git pull #in case you need to pull new code

source ./stage_2.sh
```

## stage 3 (ciftify_anat, xcp_scinet, enigma extract, enigma_dti, tractography, qsirecon2):

```sh
# note step one is to make sure you are on one of the login nodes
ssh nia-login07

## go to the repo and pull new changes
cd ${SCRATCH}/SCanD_project_GMANJ
git pull #in case you need to pull new code

source ./stage_3.sh
```

## stage 4 (parcellation):

```sh
# note step one is to make sure you are on one of the login nodes
ssh nia-login07

## go to the repo and pull new changes
cd ${SCRATCH}/SCanD_project_GMANJ
git pull #in case you need to pull new code

source ./stage_4.sh
```
## stage 5 (extract data to share folder):

```sh
# note step one is to make sure you are on one of the login nodes
ssh nia-login07

## go to the repo and pull new changes
cd ${SCRATCH}/SCanD_project_GMANJ
git pull #in case you need to pull new code

source ./stage_5.sh
```
8 changes: 5 additions & 3 deletions code/00_setup_data_directories.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@ mkdir -p logs
## link the containers
echo "linking singularity containers"
CONTAINER_DIR=/scinet/course/ss2019/3/5_neuroimaging/containers
#ln -s ${CONTAINER_DIR}/fmriprep-21.0.2.simg containers/fmriprep-21.0.2.simg
ln -s ${CONTAINER_DIR}/fmriprep-20.2.7.simg containers/fmriprep-20.2.7.simg
#ln -s ${CONTAINER_DIR}/fmriprep-20.1.1.simg containers/fmriprep-20.1.1.simg
ln -s ${CONTAINER_DIR}/fmriprep_ciftity-v1.3.2-2.3.3.simg containers/fmriprep_ciftity-v1.3.2-2.3.3.simg
ln -s ${CONTAINER_DIR}/mriqc-22.0.6.simg containers/mriqc-22.0.6.simg
ln -s ${CONTAINER_DIR}/qsiprep_0.16.0RC3.simg containers/qsiprep_0.16.0RC3.simg
ln -s ${CONTAINER_DIR}/xcp_d-0.6.0.simg containers/xcp_d-0.6.0.simg
ln -s ${CONTAINER_DIR}/fmriprep_ciftity-v1.3.2-2.3.3.simg containers/fmriprep_ciftity-v1.3.2-2.3.3.simg
ln -s ${CONTAINER_DIR}/tbss_2023-10-10.simg containers/tbss_2023-10-10.simg


## copy in Erin's freesurfer licence
cp /scinet/course/ss2019/3/5_neuroimaging/fs_license/license.txt templates/.freesurfer.txt


## copy in Erin's templates
echo "copying templates..this might take a bit"
scp -r /scinet/course/ss2019/3/5_neuroimaging/templateflow templates/.cache/
Expand Down
25 changes: 20 additions & 5 deletions code/01_fmriprep_anat_scinet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#SBATCH --time=16:00:00


SUB_SIZE=5 ## number of subjects to run
SUB_SIZE=1 ## number of subjects to run
CORES=40
export THREADS_PER_COMMAND=2

Expand Down Expand Up @@ -46,9 +46,18 @@ export WORK_DIR=${BBUFFER}/SCanD/fmriprep
export LOGS_DIR=${BASEDIR}/logs
mkdir -vp ${OUTPUT_DIR} ${WORK_DIR} # ${LOCAL_FREESURFER_DIR}

## get the subject list from a combo of the array id, the participants.tsv and the chunk size
## get the subject list from a combo of the array id, the participants.tsv and the chunk
bigger_bit=`echo "($SLURM_ARRAY_TASK_ID + 1) * ${SUB_SIZE}" | bc`
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`

N_SUBJECTS=$(( $( wc -l ${BIDS_DIR}/participants.tsv | cut -f1 -d' ' ) - 1 ))
array_job_length=$(echo "$N_SUBJECTS/${SUB_SIZE}" | bc)
Tail=$((N_SUBJECTS-(array_job_length*SUB_SIZE)))

if [ "$SLURM_ARRAY_TASK_ID" -eq "$array_job_length" ]; then
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${N_SUBJECTS} | tail -n ${Tail}`
else
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`
fi

## set singularity environment variables that will point to the freesurfer license and the templateflow bits
# export SINGULARITYENV_TEMPLATEFLOW_HOME=/home/fmriprep/.cache/templateflow
Expand Down Expand Up @@ -85,6 +94,12 @@ exitcode=$?

# Output results to a table
for subject in $SUBJECTS; do
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} $exitcode" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
if [ $exitcode -eq 0 ]; then
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} 0" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
else
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} fmriprep_anat failed" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
fi
done

33 changes: 23 additions & 10 deletions code/01_mriqc.sh → code/01_mriqc_scinet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#SBATCH --output=logs/mriqc%x_%j.out
#SBATCH --nodes=1
#SBATCH --cpus-per-task=80
#SBATCH --time=12:00:00
#SBATCH --time=18:00:00


SUB_SIZE=10 ## number of subjects to run
SUB_SIZE=4 ## number of subjects to run
export THREADS_PER_COMMAND=2

####----### the next bit only works IF this script is submitted from the $BASEDIR/$OPENNEURO_DS folder...
Expand Down Expand Up @@ -46,8 +46,19 @@ export LOGS_DIR=${BASEDIR}/logs
mkdir -vp ${OUTPUT_DIR} ${WORK_DIR} # ${LOCAL_FREESURFER_DIR}

## get the subject list from a combo of the array id, the participants.tsv and the chunk size

bigger_bit=`echo "($SLURM_ARRAY_TASK_ID + 1) * ${SUB_SIZE}" | bc`
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`

N_SUBJECTS=$(( $( wc -l ${BIDS_DIR}/participants.tsv | cut -f1 -d' ' ) - 1 ))
array_job_length=$(echo "$N_SUBJECTS/${SUB_SIZE}" | bc)
Tail=$((N_SUBJECTS-(array_job_length*SUB_SIZE)))

if [ "$SLURM_ARRAY_TASK_ID" -eq "$array_job_length" ]; then
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${N_SUBJECTS} | tail -n ${Tail}`
else
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`
fi


## set singularity environment variables that will point to the freesurfer license and the templateflow bits
# export SINGULARITYENV_TEMPLATEFLOW_HOME=/home/fmriprep/.cache/templateflow
Expand Down Expand Up @@ -78,12 +89,14 @@ singularity run --cleanenv \

exitcode=$?

# -B ${BIDS_DIR}:/bids \
# -B ${OUTPUT_DIR}:/out \
# -B ${LOCAL_FREESURFER_DIR}:/fsdir \


# Output results to a table
for subject in $SUBJECTS; do
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} $exitcode" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
done
if [ $exitcode -eq 0 ]; then
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} 0" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
else
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} mriqc failed" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
fi
done
24 changes: 19 additions & 5 deletions code/02_qsiprep_scinet.sh → code/01_qsiprep_scinet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#SBATCH --output=logs/%x_%j.out
#SBATCH --nodes=1
#SBATCH --cpus-per-task=40
#SBATCH --time=20:00:00
#SBATCH --time=06:00:00


SUB_SIZE=2 ## number of subjects to run is 1 because there are multiple tasks/run that will run in parallel
Expand Down Expand Up @@ -45,9 +45,17 @@ export WORK_DIR=${BBUFFER}/SCanD/qsiprep
export LOGS_DIR=${BASEDIR}/logs
mkdir -vp ${OUTPUT_DIR} ${WORK_DIR} # ${LOCAL_FREESURFER_DIR}

## get the subject list from a combo of the array id, the participants.tsv and the chunk size
bigger_bit=`echo "($SLURM_ARRAY_TASK_ID + 1) * ${SUB_SIZE}" | bc`
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`

N_SUBJECTS=$(( $( wc -l ${BIDS_DIR}/participants.tsv | cut -f1 -d' ' ) - 1 ))
array_job_length=$(echo "$N_SUBJECTS/${SUB_SIZE}" | bc)
Tail=$((N_SUBJECTS-(array_job_length*SUB_SIZE)))

if [ "$SLURM_ARRAY_TASK_ID" -eq "$array_job_length" ]; then
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${N_SUBJECTS} | tail -n ${Tail}`
else
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`
fi

## set singularity environment variables that will point to the freesurfer license and the templateflow bits
# Make sure FS_LICENSE is defined in the container.
Expand Down Expand Up @@ -78,6 +86,12 @@ exitcode=$?

# Output results to a table
for subject in $SUBJECTS; do
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} $exitcode" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
if [ $exitcode -eq 0 ]; then
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} 0" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
else
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} qsiprep failed" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
fi
done

26 changes: 20 additions & 6 deletions code/02_fmriprep_func_scinet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
#SBATCH --output=logs/%x_%j.out
#SBATCH --nodes=1
#SBATCH --cpus-per-task=40
#SBATCH --time=12:00:00
#SBATCH --time=23:00:00


SUB_SIZE=2 ## number of subjects to run is 1 because there are multiple tasks/run that will run in parallel
SUB_SIZE=1 ## number of subjects to run is 1 because there are multiple tasks/run that will run in parallel
CORES=40
export THREADS_PER_COMMAND=2

Expand Down Expand Up @@ -47,8 +47,16 @@ mkdir -vp ${OUTPUT_DIR} ${WORK_DIR} # ${LOCAL_FREESURFER_DIR}

## get the subject list from a combo of the array id, the participants.tsv and the chunk size
bigger_bit=`echo "($SLURM_ARRAY_TASK_ID + 1) * ${SUB_SIZE}" | bc`
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`

N_SUBJECTS=$(( $( wc -l ${BIDS_DIR}/participants.tsv | cut -f1 -d' ' ) - 1 ))
array_job_length=$(echo "$N_SUBJECTS/${SUB_SIZE}" | bc)
Tail=$((N_SUBJECTS-(array_job_length*SUB_SIZE)))

if [ "$SLURM_ARRAY_TASK_ID" -eq "$array_job_length" ]; then
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${N_SUBJECTS} | tail -n ${Tail}`
else
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`
fi
## set singularity environment variables that will point to the freesurfer license and the templateflow bits
# Make sure FS_LICENSE is defined in the container.
export SINGULARITYENV_FS_LICENSE=/home/fmriprep/.freesurfer.txt
Expand All @@ -69,6 +77,7 @@ singularity run --cleanenv \
--participant_label ${SUBJECTS} \
-w /work \
--skip-bids-validation \
--cifti-output 91k \
--omp-nthreads 8 \
--nthreads 40 \
--mem-mb 15000 \
Expand All @@ -82,9 +91,14 @@ singularity run --cleanenv \

exitcode=$?


# Output results to a table
for subject in $SUBJECTS; do
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} $exitcode" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
if [ $exitcode -eq 0 ]; then
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} 0" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
else
echo "sub-$subject ${SLURM_ARRAY_TASK_ID} fmriprep_func failed" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
fi
done

93 changes: 93 additions & 0 deletions code/02_qsirecon_step1_scinet.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/bash
#SBATCH --job-name=qsirecon1
#SBATCH --output=logs/%x_%j.out
#SBATCH --nodes=1
#SBATCH --cpus-per-task=40
#SBATCH --time=0:20:00


SUB_SIZE=1 ## number of subjects to run is 1 because there are multiple tasks/run that will run in parallel
CORES=40
export THREADS_PER_COMMAND=2

####----### the next bit only works IF this script is submitted from the $BASEDIR/$OPENNEURO_DS folder...

## set the second environment variable to get the base directory
BASEDIR=${SLURM_SUBMIT_DIR}

## set up a trap that will clear the ramdisk if it is not cleared
function cleanup_ramdisk {
echo -n "Cleaning up ramdisk directory /$SLURM_TMPDIR/ on "
date
rm -rf /$SLURM_TMPDIR
echo -n "done at "
date
}

#trap the termination signal, and call the function 'trap_term' when
# that happens, so results may be saved.
trap "cleanup_ramdisk" TERM

# input is BIDS_DIR this is where the data downloaded from openneuro went
export BIDS_DIR=${BASEDIR}/data/local/bids

## these folders envs need to be set up for this script to run properly
## see notebooks/00_setting_up_envs.md for the set up instructions
export QSIPREP_HOME=${BASEDIR}/templates
export SING_CONTAINER=${BASEDIR}/containers/qsiprep_0.16.0RC3.simg

## setting up the output folders
export OUTPUT_DIR=${BASEDIR}/data/local # use if version of fmriprep >=20.2
export QSIPREP_DIR=${BASEDIR}/data/local/qsiprep # use if version of fmriprep <=20.1

# export LOCAL_FREESURFER_DIR=${SCRATCH}/${STUDY}/data/derived/freesurfer-6.0.1
export WORK_DIR=${BBUFFER}/SCanD/qsiprep
export LOGS_DIR=${BASEDIR}/logs
mkdir -vp ${OUTPUT_DIR} ${WORK_DIR} # ${LOCAL_FREESURFER_DIR}

bigger_bit=`echo "($SLURM_ARRAY_TASK_ID + 1) * ${SUB_SIZE}" | bc`

N_SUBJECTS=$(( $( wc -l ${BIDS_DIR}/participants.tsv | cut -f1 -d' ' ) - 1 ))
array_job_length=$(echo "$N_SUBJECTS/${SUB_SIZE}" | bc)
Tail=$((N_SUBJECTS-(array_job_length*SUB_SIZE)))

if [ "$SLURM_ARRAY_TASK_ID" -eq "$array_job_length" ]; then
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${N_SUBJECTS} | tail -n ${Tail}`
else
SUBJECTS=`sed -n -E "s/sub-(\S*)\>.*/\1/gp" ${BIDS_DIR}/participants.tsv | head -n ${bigger_bit} | tail -n ${SUB_SIZE}`
fi

## set singularity environment variables that will point to the freesurfer license and the templateflow bits
# Make sure FS_LICENSE is defined in the container.

export fs_license=${BASEDIR}/templates/.freesurfer.txt

for subject in $SUBJECTS; do

echo "sub-$subject ${SLURM_ARRAY_TASK_ID} 0" \
>> ${LOGS_DIR}/${SLURM_JOB_NAME}.${SLURM_ARRAY_JOB_ID}.tsv
done

singularity run --cleanenv \
-B ${BASEDIR}/templates:/home/qsiprep --home /home/qsiprep \
-B ${BIDS_DIR}:/bids \
-B ${QSIPREP_DIR}:/derived \
-B ${WORK_DIR}:/work \
-B ${OUTPUT_DIR}:/out \
-B ${fs_license}:/li \
${SING_CONTAINER} \
/bids /out participant \
--skip-bids-validation \
--participant_label ${SUBJECTS} \
-w /work \
--skip-bids-validation \
--omp-nthreads 8 \
--nthreads 40 \
--recon_only \
--recon-spec reorient_fslstd \
--recon-input /derived \
--output-resolution 2.0 \
--fs-license-file /li \
--notrack


Loading

0 comments on commit d6204b6

Please sign in to comment.