From dd94e543ea4863257616fdbb246a6f46ae7a37c9 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 15 Jul 2024 10:15:18 -0400 Subject: [PATCH 1/4] cleaner output directory structure --- src/harpy/snakefiles/impute.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/harpy/snakefiles/impute.smk b/src/harpy/snakefiles/impute.smk index ef5778caa..bdf124284 100644 --- a/src/harpy/snakefiles/impute.smk +++ b/src/harpy/snakefiles/impute.smk @@ -17,7 +17,7 @@ biallelic = config["inputs"]["biallelic_contigs"] skipreports = config["skip_reports"] outdir = config["output_directory"] envdir = os.getcwd() + "/.harpy_envs" -paramspace = Paramspace(pd.read_csv(paramfile, sep=r"\s+", skip_blank_lines=True).rename(columns=str.lower), param_sep = "", filename_params="*") +paramspace = Paramspace(pd.read_csv(paramfile, sep=r"\s+", skip_blank_lines=True).rename(columns=str.lower), param_sep = "", filename_params = ["k", "s", "ngen", "bxlimit"]) with open(biallelic, "r") as f_open: contigs = [i.rstrip() for i in f_open.readlines()] From a7a147b4d29fbbb296ef5bfe7df6bd7cc373af53 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 15 Jul 2024 10:23:38 -0400 Subject: [PATCH 2/4] rm the interactive pop plots. They dont load for big datasets --- src/harpy/reports/LeviathanPop.Rmd | 91 +---------------------- src/harpy/reports/NaibrPop.Rmd | 114 ++--------------------------- 2 files changed, 8 insertions(+), 197 deletions(-) diff --git a/src/harpy/reports/LeviathanPop.Rmd b/src/harpy/reports/LeviathanPop.Rmd index f1e759a22..7a4f41c24 100644 --- a/src/harpy/reports/LeviathanPop.Rmd +++ b/src/harpy/reports/LeviathanPop.Rmd @@ -71,9 +71,7 @@ knitr::kable( ) ) ``` -This tab (`General Stats`) shows overview information. Clicking `Per-Contig Plots` -in the navigation bar at the top of this page will show you interactive -plots detailing all variants identified. +This report shows overview information. In the colorful summary boxes are the per-population average number of variants detected by type. @@ -320,91 +318,4 @@ popheight <- 5 / length(populations) / 10 for (SV in c("INV", "DEL", "DUP", "BND")){ circosplot(sv, fa.sizes, SV, popheight, populations, col.ramp) } -``` - -# Per-Contig Plots -## Per-contig -### plots per contig {.no-title data-height=10000} -

Structural Variants Per Contig Per Population

-Below is a plot to help you assess what structural variants were -detected by LEVIATHAN. These plots are interactive, allowing you -to hover over a variant to provide additional information, including -the genomic interval in which it occurs and the number of haplotag -barcodes supporting the variant. Populations are stacked vertically -for each contig, denoted by the grey strip on the right side of each row. - -```{r echo = FALSE, message = FALSE, warning = FALSE} -sv$ystart <- case_when( - sv$type == "INV" ~ 0.1, - sv$type == "DUP" ~ 1.1, - sv$type == "DEL" ~ 2.1, - sv$type == "BND" ~ 3.1, -) -sv$ystop <- sv$ystart + 0.8 -``` - -```{r percontig plotly, echo = FALSE, out.width = '100%', warning=FALSE} -color.palette <- c("DEL"="#5a8c84", "DUP"="#ffd75f", "INV"="#4a9fea","BND"="#df487f") -l <- list() -nplotrows <- 0 - -for (i in 1:nrow(fa.sizes)) { - sv.filt <- sv %>% filter(contig == fa.sizes$contig[i]) - sv_stats <- group_by(sv.filt, type) %>% summarise(n = length(type)) - if (nrow(sv_stats) == 0) { - next - } - nplotrows <- nplotrows + 1 - plt <- sv.filt %>% - ggplot() + - geom_rect( - alpha = 0.7, - aes( - ymin = ystart, - ymax = ystop, - xmin = position_start, - xmax = position_end, - fill = type, - color = type, - text = sprintf("Population: %s
Type: %s
Position: %s-%s
barcodes: %s", population, type, position_start, position_end, n_barcodes) - ) - ) + - geom_hline(yintercept = 1:3, color = "grey96") + - scale_color_manual(values = color.palette) + - scale_fill_manual(values = color.palette) + - facet_grid(rows = vars(population)) + - theme_light() + - theme( - axis.line.y = element_blank(), - axis.ticks.y = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - strip.background = element_rect(fill = "grey80"), - strip.text = element_text(color = "grey20"), - legend.position = "none", - ) + - scale_x_continuous(labels = scales::comma) + - scale_y_continuous( - breaks = c(0.5, 1.5, 2.5, 3.5), - labels = c("INV", "DUP", "DEL", "BND"), - limits = c(0,4) - ) + - coord_cartesian(xlim = c(0, fa.sizes$size[i] + 1), expand = F) + - xlab("Position (bp)") + - labs(fill = "SV type", color = "SV type") - annotations <- list( - x = 0.5, - y = 1.05, - text = as.character(fa.sizes$contig[i]), - xref = "paper", - yref = "paper", - xanchor = "center", - yanchor = "bottom", - showarrow = FALSE - ) - pp <- subplot(ggplotly(plt, height = 850 * length(populations), tooltip = "text")) %>% - layout(annotations = annotations) - l[[i]] <- pp -} -subplot(l, nrows = nplotrows, shareX = FALSE, titleX = TRUE) ``` \ No newline at end of file diff --git a/src/harpy/reports/NaibrPop.Rmd b/src/harpy/reports/NaibrPop.Rmd index b3d30a4d1..e1018a662 100644 --- a/src/harpy/reports/NaibrPop.Rmd +++ b/src/harpy/reports/NaibrPop.Rmd @@ -45,11 +45,11 @@ script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.7.1/jquery.min.js" ``` ```{r echo = FALSE, warnings = FALSE, message = FALSE} -infiles <- snakemake@input[["bedpe"]] -fai <- snakemake@input[["fai"]] -#infiles <- paste0("/home/pdimens/", c(1,2,3), ".bedpe") -#infiles <- list.files("~/downsampled/", full.names = T) -#fai <- "/home/pdimens/test.fai" +#infiles <- snakemake@input[["bedpe"]] +#fai <- snakemake@input[["fai"]] +#infiles <- paste0("/home/pdimens/", c("AW","BL"), ".bedpe") +infiles <- list.files(path = "~", pattern = "*bedpe", full.names = T) +fai <- "~/test.fai" ``` # General Stats @@ -72,9 +72,7 @@ knitr::kable( ) ) ``` -This tab (`General Stats`) shows overview information. Clicking `Per-Contig Plots` -in the navigation bar at the top of this page will show you interactive -plots detailing all variants identified. +This report shows overview information. In the colorful summary boxes are the per-population average number of variants detected by type. @@ -110,7 +108,7 @@ chimeric <- function(x){ ```{r echo = FALSE} emptyfiles <- character() -sv <- data.frame(matrix(ncol=12,nrow=0, dimnames=list(NULL, c("Population", "Chr1", "Break1", "Chr2", "Break2", "SplitMolecules", "DiscordantReads", "Orientation", "Haplotype", "Score", "PassFilter", "SV" )))) +sv <- data.frame(matrix(ncol=12,nrow=0, dimnames=list(NULL, c("Population", "Chr1", "Break1", "Chr2", "Break2", "SplitMolecules", "DiscordantReads", "Orientation", "Haplotype", "Score", "PassFilter", "SV")))) for(i in infiles){ .df <- tryCatch(readvariants(i), error = function(e){return(0)}) if(length(.df)==1){ @@ -202,7 +200,6 @@ DT::datatable( filter = "top", extensions = 'Buttons', options = list(dom = 'Brtip', buttons = c('csv'), scrollX = TRUE), - autoHideNavigation = T, fillContainer = T ) ``` @@ -220,7 +217,6 @@ DT::datatable( filter = "top", extensions = 'Buttons', options = list(dom = 'Brtip', buttons = c('csv'), scrollX = TRUE), - autoHideNavigation = T, fillContainer = T ) ``` @@ -271,7 +267,6 @@ DT::datatable( filter = "top", extensions = 'Buttons', options = list(dom = 'Brtip', buttons = 'csv', scrollX = TRUE), - autoHideNavigation = T, fillContainer = T ) ``` @@ -403,104 +398,9 @@ circosplot(sv_clean, fa.sizes, "inversion", popheight, populations, col.ramp) ### Deletions ```{r echo = FALSE, message = FALSE, warning = FALSE, dev = "jpeg", fig.align='center'} circosplot(sv_clean, fa.sizes, "deletion", popheight, populations, col.ramp) - ``` ### Duplications ```{r echo = FALSE, message = FALSE, warning = FALSE, dev = "jpeg", fig.align='center'} circosplot(sv_clean, fa.sizes, "duplication", popheight, populations, col.ramp) ``` - -# Per-Contig Plots -## percontig desc -### desc {.no-title} -

Structural Variants Per Contig Per Population

-Below is a plot to help you assess what structural variants were -detected by NAIBR These plots are interactive, allowing you -to hover over a variant to provide additional information, including -the genomic interval in which it occurs and the number of haplotag -barcodes supporting the variant. Populations are stacked vertically -for each contig, denoted by the grey strip on the right side of each row. - -## per contig -### plots per contig {.no-title data-height=10000} -```{r echo = FALSE, message = FALSE, warning = FALSE} -color.palette <- c( - "deletion" = "#5a8c84", - "duplication" = "#ffd75f", - "inversion" = "#4a9fea" -) - -sv_clean$ystart <- case_when( - sv_clean$SV == "inversion" ~ 0.1, - sv_clean$SV == "duplication" ~ 1.1, - sv_clean$SV == "deletion" ~ 2.1 -) -sv_clean$ystop <- sv_clean$ystart + 0.8 - -``` - - -```{r echo = FALSE, out.width = '100%', warning=FALSE} -l <- list() -nplotrows <- 0 - -for (i in 1:nrow(fa.sizes)) { - sv.filt <- sv_clean %>% filter(Chr1 == fa.sizes$contig[i]) - sv_stats <- group_by(sv.filt, SV) %>% summarise(n = length(SV)) - if (nrow(sv_stats) == 0) { - next - } - plt <- sv.filt %>% - ggplot() + - geom_rect( - alpha = 0.7, - aes( - xmin = Break1, - xmax = Break2, - ymin = ystart, - ymax = ystop, - fill = SV, - color = SV, - text = sprintf("Type: %s
Position: %s-%s
barcodes: %s", SV, Break1, Break2, SplitMolecules) ) - ) + - geom_hline(yintercept = 1:3, color = "grey96") + - scale_color_manual(values = color.palette) + - scale_fill_manual(values = color.palette) + - facet_grid(rows = vars(Population)) + - theme_light() + - theme( - axis.line.y = element_blank(), - axis.ticks.y = element_blank(), - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - strip.background = element_rect(fill = "grey80"), - strip.text = element_text(color = "grey20"), - legend.position = "none", - ) + - scale_x_continuous(labels = scales::comma) + - scale_y_continuous( - breaks = c(0.5, 1.5, 2.5), - labels = c("INV", "DUP", "DEL"), - limits = c(0,3) - ) + - coord_cartesian(xlim = c(0, fa.sizes$size[i] + 1), expand = F) + - xlab("Position (bp)") - annotations <- list( - x = 0.5, - y = 1.05, - text = as.character(fa.sizes$contig[i]), - xref = "paper", - yref = "paper", - xanchor = "center", - yanchor = "bottom", - showarrow = FALSE - ) - pp <- subplot(ggplotly(plt, height = 800 * length(populations), tooltip = "text")) %>% - layout(annotations = annotations) - l[[i]] <- pp -} -# remove null -l <- Filter(Negate(is.null), l) -subplot(l, nrows = length(l) , shareX = FALSE, titleX = TRUE) -``` \ No newline at end of file From 731ed1477c3199f80e135420ed32b1aa76c764b1 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 15 Jul 2024 10:48:45 -0400 Subject: [PATCH 3/4] fix text --- src/harpy/reports/Naibr.Rmd | 2 +- src/harpy/reports/NaibrPop.Rmd | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/harpy/reports/Naibr.Rmd b/src/harpy/reports/Naibr.Rmd index bbd277569..e75bad485 100644 --- a/src/harpy/reports/Naibr.Rmd +++ b/src/harpy/reports/Naibr.Rmd @@ -383,7 +383,7 @@ circosplot(sv, fa.sizes, samplename) ## per contig ### Per-contig {.no-title data-height=7000}

Structural Variants Per Contig

-Below is a plot to help you assess what structural variants were detected by LEVIATHAN. These plots are interactive, +Below is a plot to help you assess what structural variants were detected by NAIBR. These plots are interactive, allowing you to hover over a variant to provide additional information, including the genomic interval in which it occurs and the number of haplotag barcodes supporting the variant. diff --git a/src/harpy/reports/NaibrPop.Rmd b/src/harpy/reports/NaibrPop.Rmd index e1018a662..80c36aab4 100644 --- a/src/harpy/reports/NaibrPop.Rmd +++ b/src/harpy/reports/NaibrPop.Rmd @@ -45,11 +45,10 @@ script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.7.1/jquery.min.js" ``` ```{r echo = FALSE, warnings = FALSE, message = FALSE} -#infiles <- snakemake@input[["bedpe"]] -#fai <- snakemake@input[["fai"]] -#infiles <- paste0("/home/pdimens/", c("AW","BL"), ".bedpe") -infiles <- list.files(path = "~", pattern = "*bedpe", full.names = T) -fai <- "~/test.fai" +infiles <- snakemake@input[["bedpe"]] +fai <- snakemake@input[["fai"]] +#infiles <- list.files(path = "~", pattern = "*bedpe", full.names = T) +#fai <- "~/test.fai" ``` # General Stats From 1545b58e4011d01ffe95a86f1f1f656a9c82d3e5 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 15 Jul 2024 11:54:38 -0400 Subject: [PATCH 4/4] improve interactive plot --- src/harpy/reports/Leviathan.Rmd | 2 +- src/harpy/reports/LeviathanPop.Rmd | 2 +- src/harpy/reports/Naibr.Rmd | 54 +++++++----------------------- src/harpy/reports/NaibrPop.Rmd | 2 +- 4 files changed, 16 insertions(+), 44 deletions(-) diff --git a/src/harpy/reports/Leviathan.Rmd b/src/harpy/reports/Leviathan.Rmd index 0f0d3ac26..aca141ad4 100644 --- a/src/harpy/reports/Leviathan.Rmd +++ b/src/harpy/reports/Leviathan.Rmd @@ -1,5 +1,5 @@ --- -title: "Leviathan Structural Variant Calling Summary" +title: "Structural Variant Calling Summary (LEVIATHAN)" date: "`r format(Sys.time(), '%d %b, %Y at %H:%M')`" output: flexdashboard::flex_dashboard: diff --git a/src/harpy/reports/LeviathanPop.Rmd b/src/harpy/reports/LeviathanPop.Rmd index 7a4f41c24..ac9ab3660 100644 --- a/src/harpy/reports/LeviathanPop.Rmd +++ b/src/harpy/reports/LeviathanPop.Rmd @@ -1,5 +1,5 @@ --- -title: "Leviathan Structural Variant Calling Summary" +title: "Structural Variant Calling Summary (LEVIATHAN)" date: "`r format(Sys.time(), '%d %b, %Y at %H:%M')`" output: flexdashboard::flex_dashboard: diff --git a/src/harpy/reports/Naibr.Rmd b/src/harpy/reports/Naibr.Rmd index e75bad485..6ce8b3426 100644 --- a/src/harpy/reports/Naibr.Rmd +++ b/src/harpy/reports/Naibr.Rmd @@ -1,5 +1,5 @@ --- -title: "NAIBR Structural Variant Calling Summary" +title: "Structural Variant Calling Summary (NAIBR)" date: "`r format(Sys.time(), '%d %b, %Y at %H:%M')`" output: flexdashboard::flex_dashboard: @@ -267,12 +267,11 @@ You may click on the plot to expand it in your browser window. Clicking it again out of the zoomed view. ```{r} -justcolors <- c("#4a9fea", "#5a8c84", "#ffd75f") - -data.frame(x = 1, y = 1:3, colour = c("Inversion", "Deletion","Duplication")) %>% +color.palette <- c("deletion" = "#5a8c84", "duplication" = "#ffd75f", "inversion" = "#4a9fea") +data.frame(x = 1, y = 1:3, colour = c("inversion", "deletion","duplication")) %>% ggplot(aes(x, y, fill = colour))+ geom_point(alpha=0, shape = 22, color = "white")+ # completely transparent rectangular point - scale_fill_manual(values=justcolors, drop=FALSE) + + scale_fill_manual(values=color.palette, drop=FALSE) + guides(fill = guide_legend(override.aes = list(alpha=1, size = 25)))+ # showing the point in the legend theme(axis.title = element_blank(), axis.text = element_blank(), @@ -388,36 +387,23 @@ allowing you to hover over a variant to provide additional information, includin number of haplotag barcodes supporting the variant. ```{r colors, echo = FALSE, warning = FALSE, message = FALSE} -color.palette <- c( - "deletion" = "#5a8c84", - "duplication" = "#ffd75f", - "inversion" = "#4a9fea" -) - sv$ystart <- case_when( sv$SV == "inversion" ~ 0.1, sv$SV == "duplication" ~ 1.1, sv$SV == "deletion" ~ 2.1 ) sv$ystop <- sv$ystart + 0.8 + +figheight <- nrow(fa.sizes) * 160 ``` -```{r} -l <- list() -#nplotrows <- 0 -for (i in 1:nrow(fa.sizes)) { - sv.filt <- sv %>% filter(Chr1 == fa.sizes$contig[i]) - sv_stats <- group_by(sv.filt, SV) %>% summarise(n = length(SV)) - if (nrow(sv_stats) == 0) { - next - } - plt <- sv.filt %>% - ggplot() + +```{r interactive_plot, fig.height=figheight} +plt <- ggplot(sv) + geom_rect( alpha = 0.7, aes( - xmin = Break1, - xmax = Break2, + xmin = Break1/1000000, + xmax = Break2/1000000, ymin = ystart, ymax = ystop, fill = SV, @@ -439,28 +425,14 @@ for (i in 1:nrow(fa.sizes)) { strip.text = element_text(color = "grey20"), legend.position = "none", ) + + coord_cartesian(xlim = c(0, NA), expand = F) + scale_x_continuous(labels = scales::comma) + - coord_cartesian(xlim = c(0, fa.sizes$size[i] + 1), expand = F) + scale_y_continuous( breaks = c(0.5, 1.5, 2.5), labels = c("INV", "DUP", "DEL"), limits = c(0,3) ) + - xlab("Position (bp)") - annotations <- list( - x = 0.5, - y = 1.05, - text = as.character(fa.sizes$contig[i]), - xref = "paper", - yref = "paper", - xanchor = "center", - yanchor = "bottom", - showarrow = FALSE - ) - l[[i]] <- subplot(ggplotly(plt, height = 800, tooltip = "text")) -} + xlab("Position (Mbp)") -# remove null -l <- Filter(Negate(is.null), l) -subplot(l, nrows = length(l) , shareX = FALSE, titleX = TRUE) +ggplotly(plt, height = figheight, tooltip = "text", margin = list(l = 50, r = 50, b = 100, t = 100, pad = 1)) ``` \ No newline at end of file diff --git a/src/harpy/reports/NaibrPop.Rmd b/src/harpy/reports/NaibrPop.Rmd index 80c36aab4..8175e474f 100644 --- a/src/harpy/reports/NaibrPop.Rmd +++ b/src/harpy/reports/NaibrPop.Rmd @@ -1,5 +1,5 @@ --- -title: "NAIBR Structural Variant Calling Summary" +title: "Structural Variant Calling Summary (NAIBR)" date: "`r format(Sys.time(), '%d %b, %Y at %H:%M')`" output: flexdashboard::flex_dashboard: