Skip to content

Commit

Permalink
clean up analysis of key validation datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
wir963 committed Sep 17, 2020
1 parent e398e48 commit 0112dca
Show file tree
Hide file tree
Showing 36 changed files with 55,223 additions and 642 deletions.
144 changes: 67 additions & 77 deletions Aulicino2018/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,106 +3,48 @@ import pandas as pd

wildcard_constraints:
patient="Pt0",
plate="P1|P2|P3|P4"
plate="P1|P2|P3|P4",
celltype="status|infected|infection"


#samples = pd.read_csv("data/samples.tsv", sep="\t")

reads_across_conditions_plot = join("output", "{tax_level}_read-from-{microbe_of_interest}.png")

# input files
KRAKEN_PAIRED_BIOM = join("data", "Kraken", "{patient}-{sample}", "paired-sequences.biom")
KRAKEN_UNPAIRED_BIOM = join("data", "Kraken", "{patient}-{sample}", "unpaired-sequences.biom")

#FILTER_METRICS = join("filter-metrics", "{patient}-{sample}", "filter-metrics.txt")
COMBINED_FILTER_METRICS = join("output", "dataset-filter-metrics.tsv")
EDGER_FORMULA=""

EDGER_RESULTS = join("output", "{tax_level}_{method}_edgeR_{norm_method}_results_{patient}-{celltype}.tsv")
EDGER_SPIKE_RESULTS = join("output", "{tax_level}_edgeR_spike_results_{patient}-{celltype}.tsv")
METAGENOMESEQ_RESULTS = join("output", "{tax_level}_metagenomSeq_results_{patient}-{celltype}.tsv")
NORM_COMP_PLOT = join("output", "normalization-comparison-{patient}-{celltype}-{microbe}-{tax_level}-{method}-plot.png")
DECONV_NORM_PLOT = join("output", "deconv-normalization-{patient}-{celltype}-{microbe}-{tax_level}-{method}-plot.png")
DECONV_NORM_PLOT = join("output", "deconv-normalization-{patient}-{celltype}-{microbe}-{tax_level}-{method}-{kingdom}-plot.png")
SPIKE_NORM_PLOT = join("output", "spike-normalization-{patient}-{celltype}-{microbe}-{tax_level}-{method}-plot.png")

TTEST_MARKERS = join("output", "t-test-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}.tsv")
WILCOX_MARKERS = join("output", "wilcox-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}.tsv")
PLATE_TTEST_MARKERS = join("output", "t-test-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}-{infected_plate}-{control_plate}.tsv")
PLATE_WILCOX_MARKERS = join("output", "wilcox-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}-{infected_plate}-{control_plate}.tsv")
COMBINED_PLATE_WILCOX_MARKERS = join("output", "combined-wilcox-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}-{infected_plate1}-{infected_plate2}-{control_plate1}-{control_plate2}.tsv")
TTEST_MARKERS = join("output", "t-test-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{kingdom}-{norm}-{pvaltype}.tsv")
WILCOX_MARKERS = join("output", "wilcox-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{kingdom}-{norm}-{pvaltype}.tsv")
#PLATE_TTEST_MARKERS = join("output", "t-test-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}-{infected_plate}-{control_plate}.tsv")
#PLATE_WILCOX_MARKERS = join("output", "wilcox-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}-{infected_plate}-{control_plate}.tsv")
#COMBINED_PLATE_WILCOX_MARKERS = join("output", "combined-wilcox-{patient}-{celltype}-{celltype_of_interest}-{tax_level}-{method}-{norm}-{infected_plate1}-{infected_plate2}-{control_plate1}-{control_plate2}.tsv")
FIGURE1 = join("output", "figure1.png")

# parameters
SPIKE_PREFIX = "ERCC"
SPIKE_PREFIX = "ERCC-00"
include: "../rules/common.smk"
include: "../rules/stats.smk"
include: "../rules/spikein.smk"
include: "../rules/plotting.smk"
plates = ["P1", "P2", "P3", "P4"]

rule all:
input:
FIGURE1
#expand(DECONV_NORM_PLOT, patient="Pt0", tax_level="strain", method="PathSeq", celltype="infection", kingdom="Bacteria", microbe=["Salmonella_enterica_subsp._enterica_serovar_Typhimurium_str._D23580", "Salmonella_enterica_subsp._enterica_serovar_Typhimurium_str._LT2"]),
expand(DECONV_NORM_PLOT, patient="Pt0", tax_level="genus", method="PathSeq", celltype="infected", kingdom="Bacteria", microbe=["Salmonella"]),
expand(WILCOX_MARKERS, patient="Pt0", tax_level="genus", method="PathSeq", celltype="infected", celltype_of_interest=["infected"], norm="deconv", kingdom="Bacteria", pvaltype="all"),
# expand(WILCOX_MARKERS, patient="Pt0", tax_level="genus", method="PathSeq", celltype="status", celltype_of_interest=["infected"], norm="deconv", kingdom="Bacteria")

# figure 1 consists of figure 1a and figure 1b
# figure 1a - comparison of infected vs. exposed vs. uninfected
# figure 1b - table of results from CSI-Microbes test
rule plot_fig1:
input:
PATIENT_MICROBE_READ_TABLE.format(patient="Pt0", method="PathSeq", tax_level="genus"),
PATIENT_SAMPLE_METADATA.format(patient="Pt0", method="PathSeq", tax_level="genus"),
PATIENT_MICROBE_READ_TABLE.format(patient="Pt0", method="PathSeq", tax_level="genus", kingdom="Bacteria"),
PATIENT_SAMPLE_METADATA.format(patient="Pt0", method="PathSeq", tax_level="genus", kingdom="Bacteria"),
output:
FIGURE1
script:
"src/plot_figure1.R"


rule combine_plate_tests:
input:
PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
norm="{norm}", infected_plate="{infected_plate1}", control_plate="{control_plate1}"),
PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
norm="{norm}", infected_plate="{infected_plate1}", control_plate="{control_plate2}"),
PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
norm="{norm}", infected_plate="{infected_plate2}", control_plate="{control_plate1}"),
PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
norm="{norm}", infected_plate="{infected_plate2}", control_plate="{control_plate2}"),
output:
COMBINED_PLATE_WILCOX_MARKERS
script:
"src/combine_plate_tests.py"


rule calculate_markers_spikein_plate_test:
wildcard_constraints:
norm="spike"
params:
spike=SPIKE_PREFIX
input:
PATIENT_MICROBE_READ_TABLE,
PATIENT_SAMPLE_METADATA,
STAR_READCOUNT_TABLE
output:
PLATE_TTEST_MARKERS,
PLATE_WILCOX_MARKERS
script:
"src/run_scran_marker_analysis_spikein_plate.R"

rule calculate_markers_plate_test:
wildcard_constraints:
norm="deconv"
input:
PATIENT_MICROBE_READ_TABLE,
PATIENT_SAMPLE_METADATA
output:
PLATE_TTEST_MARKERS,
PLATE_WILCOX_MARKERS
script:
"src/run_scran_marker_analysis_plate.R"


rule convert_PathSeq_to_read_counts:
wildcard_constraints:
method="PathSeq"
Expand All @@ -115,7 +57,55 @@ rule convert_PathSeq_to_read_counts:
MICROBE_READ_TABLE,
SAMPLE_METADATA,
script:
"src/convert_PathSeq_output_to_read_counts.py"
"../src/convert_PathSeq_output_to_read_counts.py"

#
# rule combine_plate_tests:
# input:
# PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
# celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
# norm="{norm}", infected_plate="{infected_plate1}", control_plate="{control_plate1}"),
# PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
# celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
# norm="{norm}", infected_plate="{infected_plate1}", control_plate="{control_plate2}"),
# PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
# celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
# norm="{norm}", infected_plate="{infected_plate2}", control_plate="{control_plate1}"),
# PLATE_WILCOX_MARKERS.format(patient="{patient}", method="{method}", celltype="{celltype}",
# celltype_of_interest="{celltype_of_interest}", tax_level="{tax_level}",
# norm="{norm}", infected_plate="{infected_plate2}", control_plate="{control_plate2}"),
# output:
# COMBINED_PLATE_WILCOX_MARKERS
# script:
# "src/combine_plate_tests.py"
#
#
# rule calculate_markers_spikein_plate_test:
# wildcard_constraints:
# norm="spike"
# params:
# spike=SPIKE_PREFIX
# input:
# PATIENT_MICROBE_READ_TABLE,
# PATIENT_SAMPLE_METADATA,
# STAR_READCOUNT_TABLE
# output:
# PLATE_TTEST_MARKERS,
# PLATE_WILCOX_MARKERS
# script:
# "src/run_scran_marker_analysis_spikein_plate.R"
#
# rule calculate_markers_plate_test:
# wildcard_constraints:
# norm="deconv"
# input:
# PATIENT_MICROBE_READ_TABLE,
# PATIENT_SAMPLE_METADATA
# output:
# PLATE_TTEST_MARKERS,
# PLATE_WILCOX_MARKERS
# script:
# "src/run_scran_marker_analysis_plate.R"


rule combine_STAR_output:
Expand All @@ -142,6 +132,6 @@ rule combine_STAR_output:

# rsync -avc --include='filter-metrics.txt' --include='*/' --exclude='*' helix:/data/Robinson-SB/scRNA-seq-microbe-identification/Aulicino2018/identify-microbes-workflow/output/ data/

# rsync -avc --include='pathseq.txt' --include='*/' --exclude='*' helix:/data/Robinson-SB/scRNA-seq-microbe-identification/Aulicino2018/identify-microbes-workflow/output/ exp4_data/
# rsync -avc --include='pathseq.txt' --include='*/' --exclude='*' helix:/data/Robinson-SB/scRNA-seq-microbe-identification/Aulicino2018/output/ data/

# rsync -avc --include='ReadsPerGene.out.tab' --include='*/' --exclude='*' helix:/data/Robinson-SB/scRNA-seq-microbe-identification/Aulicino2018/identify-microbes-workflow/output/ data/
28 changes: 2 additions & 26 deletions Aulicino2018/data/patients.tsv
Original file line number Diff line number Diff line change
@@ -1,26 +1,2 @@
old_patient patient
Pt0 Pt0
Pt1 Pt0
Pt2 Pt0
Pt3 Pt0
Pt4 Pt0
Pt5 Pt0
Pt6 Pt0
Pt7 Pt0
Pt8 Pt0
Pt9 Pt0
Pt10 Pt0
Pt11 Pt0
Pt12 Pt0
Pt13 Pt0
Pt14 Pt0
Pt15 Pt0
Pt16 Pt0
Pt17 Pt0
Pt18 Pt0
Pt19 Pt0
Pt20 Pt0
Pt21 Pt0
Pt22 Pt0
Pt23 Pt0
Pt24 Pt0
patient
Pt0
Loading

0 comments on commit 0112dca

Please sign in to comment.