update all mermaids

pdimens · Jun 5, 2024 · aa3083d · aa3083d
1 parent b0dcd05
commit aa3083d
Show file tree

Hide file tree

Showing 12 changed files with 122 additions and 102 deletions.
diff --git a/Modules/Align/bwa.md b/Modules/Align/bwa.md
@@ -89,25 +89,27 @@ are not used to inform mapping. The `-m` threshold is used for alignment molecul
 
 ```mermaid
 graph LR
-    A([index genome]) --> B([align to genome])
-    B-->C([sort alignments])
-    C-->D([mark duplicates])
-    D-->E([assign molecules])
-    E-->F([alignment metrics])
-    D-->G([barcode stats])
+    A([index genome]):::clean --> B([align to genome]):::clean
+    B-->C([sort alignments]):::clean
+    C-->D([mark duplicates]):::clean
+    D-->E([assign molecules]):::clean
+    E-->F([alignment metrics]):::clean
+    D-->G([barcode stats]):::clean
     G-->F
     subgraph aln [Inputs]
-        Z[FASTQ files]---genome
+        Z[FASTQ files]:::clean---genome:::clean
     end
     aln-->B & A
     subgraph markdp [mark duplicates via `samtools`]
         direction LR
-        collate-->fixmate
-        fixmate-->sort
-        sort-->markdup
+        collate:::clean-->fixmate:::clean
+        fixmate-->sort:::clean
+        sort-->markdup:::clean
     end
     style markdp fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style aln fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
+
 ```
 +++ :icon-file-directory: BWA output
 The default output directory is `Align/bwa` with the folder structure below. `Sample1` is a generic sample name for demonstration purposes.

diff --git a/Modules/Align/ema.md b/Modules/Align/ema.md
@@ -105,28 +105,28 @@ within alignments, but the BWA alignments need duplicates marked manually using
 ```mermaid
 graph LR
     subgraph Inputs
-        trm[FASTQ files]---geno[genome]
+        trm[FASTQ files]:::clean---geno[genome]:::clean
     end
     Inputs-->A & IDX
-    A([EMA count]) --> B([EMA preprocess])
-    B-->C([EMA align barcoded])
-    C-->D([sort BX alignments])
-    D-->F([merge all alignments])
-    IDX([index genome])-->C
-    IDX-->Z([BWA align unbarcoded])
-    Z-->Y([sort alignments])
-    Y-->X([mark duplicates])
+    A([EMA count]):::clean --> B([EMA preprocess]):::clean
+    B-->C([EMA align barcoded]):::clean
+    C-->D([sort BX alignments]):::clean
+    D-->F([merge all alignments]):::clean
+    IDX([index genome]):::clean-->C
+    IDX-->Z([BWA align unbarcoded]):::clean
+    Z-->Y([sort alignments]):::clean
+    Y-->X([mark duplicates]):::clean
     X-->F
-    F-->J([alignment stats])
+    F-->J([alignment stats]):::clean
     subgraph markdp [mark duplicates via `samtools`]
         direction LR
-        collate-->fixmate
-        fixmate-->sort
-        sort-->markdup
+        collate:::clean-->fixmate:::clean
+        fixmate-->sort:::clean
+        sort-->markdup:::clean
     end
     style markdp fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style Inputs fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
-
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 +++ :icon-file-directory: EMA output
 The default output directory is `Align/ema` with the folder structure below. `Sample1` is a generic sample name for demonstration purposes. 

diff --git a/Modules/Align/minimap.md b/Modules/Align/minimap.md
@@ -91,25 +91,26 @@ are not used to inform mapping. The `-m` threshold is used for alignment molecul
 
 ```mermaid
 graph LR
-    A([index genome]) --> B([align to genome])
-    B-->C([sort alignments])
-    C-->D([mark duplicates])
-    D-->E([assign molecules])
-    E-->F([alignment metrics])
-    D-->G([barcode stats])
+    A([index genome]):::clean --> B([align to genome]):::clean
+    B-->C([sort alignments]):::clean
+    C-->D([mark duplicates]):::clean
+    D-->E([assign molecules]):::clean
+    E-->F([alignment metrics]):::clean
+    D-->G([barcode stats]):::clean
     G-->F
     subgraph aln [Inputs]
-        Z[FASTQ files]---genome[genome]
+        Z[FASTQ files]:::clean---genome[genome]:::clean
     end
     aln-->B & A
     subgraph markdp [mark duplicates via `samtools`]
         direction LR
-        collate-->fixmate
-        fixmate-->sort
-        sort-->markdup
+        collate:::clean-->fixmate:::clean
+        fixmate-->sort:::clean
+        sort-->markdup:::clean
     end
     style markdp fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style aln fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 +++ :icon-file-directory: minimap2 output
 The default output directory is `Align/minimap` with the folder structure below. `Sample1` is a generic sample name for demonstration purposes.

diff --git a/Modules/SV/leviathan.md b/Modules/SV/leviathan.md
@@ -99,17 +99,17 @@ in the alignments, then it calls variants using Leviathan.
 ```mermaid
 graph LR
     subgraph id1 [Population calling]
-        bams2[BAM alignments] --> popsplit([merge by population])
+        bams2[BAM alignments]:::clean --> popsplit([merge by population]):::clean
     end
     subgraph id2 [Individual calling]
-        bams[BAM alignments]
+        bams[BAM alignments]:::clean
     end
     id1 & id2-->A
-    A([index barcodes]) --> B([leviathan])
-    B-->C([convert to BCF])
-    C-->E([generate reports])
+    A([index barcodes]):::clean --> B([leviathan]):::clean
+    B-->C([convert to BCF]):::clean
     style id1 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style id2 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 +++ :icon-file-directory: leviathan output
 The default output directory is `SV/leviathan` with the folder structure below. `sample1` and `sample2` are generic sample names for demonstration purposes.

diff --git a/Modules/SV/naibr.md b/Modules/SV/naibr.md
@@ -113,15 +113,20 @@ Your VCF file should be [filtered in some capacity](../snp.md/#filtering-variant
 title: Calling variants with NAIBR, starting with unphased alignments
 ---
 graph LR
-    aln[alignments]-->|harpy snp|snps([SNPs])
-    snps-->|bcftools filter -i 'QUAL>95' ...|filt([filtered SNPs])
-    filt-->|harpy phase|phasesnp([phased haplotypes])  
+    subgraph id2 ["You do this part"]
+        aln[alignments]:::clean-->|harpy snp|snps([SNPs]):::clean
+        snps-->|bcftools filter -i 'QUAL>95' ...|filt([filtered SNPs]):::clean
+        filt-->|harpy phase|phase([phased haplotypes]):::phase
+    end
+    id2-->|harpy sv naibr|id1
     subgraph id1 ["Harpy does this part"]
-    phasesnp-->|whatshap haplotag|aln2
-    aln2([phased alignments])-->|NAIBR|results((structural variants))
+        phase2([phased haplotypes]):::phase-->|whatshap haplotag|aln2:::clean
+        aln2([phased alignments])-->|NAIBR|results((structural variants)):::clean
     end
     style id1 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
-
+    style id2 fill:#dfe3ee,stroke:#c8ccd6,stroke-width:2px
+    classDef phase fill:#b7c9ef,stroke:#dfe3ee,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 
 ----
@@ -137,22 +142,23 @@ This fork includes improved accuracy as well as quality-of-life updates.
 ```mermaid
 graph LR
     subgraph id1 ["Phase"]
-    aln[unphased alignments]---vcf[phased VCF]
+        aln[unphased alignments]:::clean---vcf[phased VCF]:::clean
     end
-    id1-->phased([phased alignments])
+    id1-->phased([phased alignments]):::clean
     subgraph id2 ["Population calling"]
-    popsplit([merge by population])
+        popsplit([merge by population]):::clean
     end
     phased-->id2
     popsplit-->A
     phased-->A
-    A([index alignments]) --> B([NAIBR])
-    Z([create config file]) --> B
+    A([index alignments]):::clean --> B([NAIBR]):::clean
+    Z([create config file]):::clean --> B
     popsplit --> Z
     phased --> Z
-    B-->C([generate reports])
+    
     style id2 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style id1 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 +++ :icon-file-directory: naibr output
 The default output directory is `SV/naibr` with the folder structure below. `sample1` and `sample2` are generic sample 

diff --git a/Modules/Simulate/simulate-linkedreads.md b/Modules/Simulate/simulate-linkedreads.md
@@ -124,11 +124,13 @@ Conveniently, we provide a calculator to help you make informed decisions for th
 ```mermaid
 graph LR
     subgraph Inputs
-        A[genome haplotype 1]
-        B[genome haplotype 2]
+        direction BT
+        A[genome haplotype 1]:::clean
+        B[genome haplotype 2]:::clean
     end
-    Inputs-->D([dwgsim])
-    D-->L([LRSIM])
-    L-->H([convert to haplotag])
+    Inputs-->D([dwgsim]):::clean
+    D-->L([LRSIM]):::clean
+    L-->H([convert to haplotag]):::clean
     style Inputs fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
diff --git a/Modules/Simulate/simulate-variants.md b/Modules/Simulate/simulate-variants.md
@@ -222,10 +222,13 @@ Simulate random variants onto your haploid assembly with `--heterozygosity` (`-z
 We aren't interested in the resulting genome, but rather the positions of the variants `simuG` created.
 ```mermaid
 graph LR
-    geno(haploid genome)-->|simulate inversion -n 10 -z 0.5|hap(inversions.vcf)
+    geno(haploid genome)-->|simulate inversion -n 10 -z 0.5|hap(inversions.vcf):::clean
     hap-->hap1(inversion.hap1.vcf)
     hap-->hap2(inversion.hap2.vcf)
     style geno fill:#ebb038,stroke:#d19b2f,stroke-width:2px
+    style hap1 fill:#f5f6f9,stroke:#90c8be,stroke-width:2px
+    style hap2 fill:#f5f6f9,stroke:#bd8fcb,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 #### Step 2
 Use the resulting hap1 and hap2 VCF files to simulate those same variants, but shuffled
@@ -238,6 +241,7 @@ graph LR
     end
     id1-->|simulate inversion -v|hapgeno(haplotype-1 genome)
     style id1 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    style hap1 fill:#f5f6f9,stroke:#90c8be,stroke-width:2px
     style hapgeno fill:#90c8be,stroke:#6fb6a9,stroke-width:2px
     style geno fill:#ebb038,stroke:#d19b2f,stroke-width:2px
 ```
@@ -248,6 +252,7 @@ graph LR
     end
     id2-->|simulate inversion -v|hapgeno2(haplotype-2 genome)
     style id2 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    style hap2 fill:#f5f6f9,stroke:#bd8fcb,stroke-width:2px
     style hapgeno2 fill:#bd8fcb,stroke:#a460b7,stroke-width:2px
     style geno fill:#ebb038,stroke:#d19b2f,stroke-width:2px
 ```
@@ -258,11 +263,13 @@ we're only interested in the haplotype VCF files (positions of variants) and not
 genome.
 ```mermaid
 graph LR
-    geno(haplotype-1 genome)-->|simulate snpindel -n 100000 -z 0.5|hap(snpindel.vcf)
+    geno(haplotype-1 genome)-->|simulate snpindel -n 100000 -z 0.5|hap(snpindel.vcf):::clean
     hap-->hap1(snpindel.hap1.vcf)
     hap-->hap2(snpindel.hap2.vcf)
     style geno fill:#90c8be,stroke:#6fb6a9,stroke-width:2px
-
+    style hap1 fill:#f5f6f9,stroke:#90c8be,stroke-width:2px
+    style hap2 fill:#f5f6f9,stroke:#bd8fcb,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 #### Step 4
 Use the resulting haplotype VCFs to simulate known variants onto the **haplotype genomes** from
@@ -275,16 +282,18 @@ graph LR
     id1-->|simulate inversion -v|genohap1(haplotype-1 genome with new variants)
     style id1 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style geno fill:#90c8be,stroke:#6fb6a9,stroke-width:2px
+    style hap1 fill:#f5f6f9,stroke:#90c8be,stroke-width:2px
     style genohap1 fill:#90c8be,stroke:#000000,stroke-width:2px
 ```
 ```mermaid
 graph LR
     subgraph id2 ["Haplotype 2 inputs"]
-    hap1(snpindel.hap2.vcf)---geno(haplotype-2 genome)
+    hap2(snpindel.hap2.vcf)---geno(haplotype-2 genome)
     end
     id2-->|simulate inversion -v|genohap2(haplotype-2 genome with new variants)
     style id2 fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
     style geno fill:#bd8fcb,stroke:#a460b7,stroke-width:2px
+    style hap2 fill:#f5f6f9,stroke:#bd8fcb,stroke-width:2px
     style genohap2 fill:#bd8fcb,stroke:#000000,stroke-width:2px
 ```
 

diff --git a/Modules/demultiplex.md b/Modules/demultiplex.md
@@ -76,15 +76,16 @@ individual samples is performed in parallel and using the beloved workhorse `gre
 ```mermaid
 graph LR
     subgraph Inputs
-        A[multiplexed FASTQ]
-        BX[Barcode Files]
-        SCH[Sample Schema]
+        direction TB
+        A[multiplexed FASTQ]:::clean---BX
+        BX[Barcode Files]:::clean---SCH
+        SCH[Sample Schema]:::clean
     end
-    Inputs-->B([barcodes to headers])
-    B-->C([demultiplex samples])
-    C-->D([quality metrics])
-    D-->E([create report])
+    Inputs-->B([barcodes to headers]):::clean
+    B-->C([demultiplex samples]):::clean
+    C-->D([quality metrics]):::clean
     style Inputs fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 
 +++ :icon-file-directory: demultiplexing output

diff --git a/Modules/impute.md b/Modules/impute.md
@@ -193,17 +193,17 @@ contigs have at least 2 biallelic SNPs, then performs imputation on only those c
 ```mermaid
 graph LR
     subgraph Inputs
-        v[VCF file]---gen[genome]
-        gen---bam[BAM alignments]
+        v[VCF file]:::clean---gen[genome]:::clean
+        gen---bam[BAM alignments]:::clean
     end
-    B([split contigs])-->C([keep biallelic SNPs])
+    B([split contigs]):::clean-->C([keep biallelic SNPs]):::clean
     Inputs-->B & C & G
-    C-->D([convert to STITCH format])
-    D-->E([STITCH imputation])
-    E-->F([merge output])
-    G([create file list])-->E
+    C-->D([convert to STITCH format]):::clean
+    D-->E([STITCH imputation]):::clean
+    E-->F([merge output]):::clean
+    G([create file list]):::clean-->E
     style Inputs fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
-
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 +++ :icon-file-directory: impute output
 The default output directory is `Impute` with the folder structure below. `contig1` and `contig2` 

diff --git a/Modules/phase.md b/Modules/phase.md
@@ -85,23 +85,23 @@ across all of your samples to speed things along.
 ```mermaid
 graph LR
     subgraph Inputs
-    Z([sample alignments])---gen["genome (optional)"]
+        Z([sample alignments]):::clean---gen["genome (optional)"]:::clean
     end
-    Inputs --> B([extractHAIRS])
-    Inputs--->A([split samples])
-    Inputs-->C([LinkFragments])
-    C-->D([phase blocks])
+    Inputs--->A([isolate heterozygotes]):::clean
+    A ---> B([extractHAIRS]):::clean
+    B-->C([LinkFragments]):::clean
+    C-->D([phase blocks]):::clean
     B-->D
     A-->D
-    D-->E([annotate BCFs])
-    E-->F([index annotations])
-    F-->G([merge annotations])
+    D-->E([annotate BCFs]):::clean
+    E-->F([index annotations]):::clean
+    F-->G([merge annotations]):::clean
     E-->G
     A-->G
     D-->G
-    G-->H([index merged annotations])
-    H-->I([merge phased samples])
+    G-->I([merge phased samples]):::clean
     style Inputs fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 
 +++ :icon-file-directory: phasing output
@@ -122,7 +122,7 @@ Phase/
 │   ├── Sample1.unlinked.frags
 │   └── logs
 │       └── Sample1.unlinked.log
-├── input
+├── workflow/input
 │   ├── header.names
 │   ├── Sample1.bcf
 │   └── Sample1.het.bcf

diff --git a/Modules/qc.md b/Modules/qc.md
@@ -47,13 +47,12 @@ approach (`--cut-right`) to identify low quality bases. The workflow is quite si
 ```mermaid
 graph LR
     subgraph Inputs
-        F[FASTQ files]
+        F[FASTQ files]:::clean
     end
-    Inputs-->A
-    A([fastp trim]) --> B([count barcodes])
-    A --> C([create reports])
-    B --> C
+    Inputs-->A:::clean
+    A([fastp]) --> B([count barcodes]):::clean
     style Inputs fill:#f0f0f0,stroke:#e8e8e8,stroke-width:2px
+    classDef clean fill:#f5f6f9,stroke:#b7c9ef,stroke-width:2px
 ```
 
 +++ :icon-file-directory: qc output