diff --git a/src/harpy/reports/HapCut2.Rmd b/src/harpy/reports/HapCut2.Rmd index b23cda473..0cc42cfeb 100644 --- a/src/harpy/reports/HapCut2.Rmd +++ b/src/harpy/reports/HapCut2.Rmd @@ -163,22 +163,6 @@ to collapse the right tails for better readability. The table below provides details on the outcome of haplotype phasing on a per-contig basis. ## Per contig data -### per contig plot {.no-title} -```{r warning=FALSE, message=FALSE, echo= FALSE, fig.height=ridgeheight, fig.width=8} -ggplot(df, aes(x = block_length, y = contig, fill = after_stat(x))) + - geom_density_ridges_gradient(scale = 1.4) + - scale_fill_viridis_c(option = "C", trans = "log10") + - theme_minimal() + - theme(legend.position = "none") + - scale_x_log10( - breaks = trans_breaks("log10", function(x) 10^x), - labels = trans_format("log10", math_format(10^.x)) - ) + - labs(title = "Distribution of Haplotype Lengths by Contig", fill = "Haplotype Length (bp)") + - xlab("Haplotype Length (log scale)") + - ylab("") -``` - ### stats per contig {.no-title} ```{r echo= FALSE, message = FALSE, warning = FALSE, out.width="100%"} percontig <- df %>% group_by(contig) %>% summarise( @@ -200,6 +184,24 @@ DT::datatable( ) ``` +## contig ridgeplot +### per contig plot {.no-title} +```{r warning=FALSE, message=FALSE, echo= FALSE, fig.height=ridgeheight, fig.width=8} +ggplot(df, aes(x = block_length, y = contig, fill = stat(x))) + + geom_density_ridges_gradient() + + scale_fill_viridis_c(option = "C", trans = "log10") + + theme_minimal() + + theme(legend.position = "none") + + scale_x_log10( + breaks = trans_breaks("log10", function(x) 10^x), + labels = trans_format("log10", math_format(10^.x)) + ) + + labs(title = "Distribution of Haplotype Lengths by Contig", fill = "Haplotype Length (bp)") + + xlab("Haplotype Length (log scale)") + + ylab("") +``` + + # Per-Sample Stats ## per sample desc ### stats per sample desc {.no-title} @@ -213,23 +215,7 @@ Phasing typically results in extreme right-tails in these distributions, making visualization difficult to plot meaningfully. The plot presented below has **log-scaled lengths** to collapse the right tails for better readability. -## per sample -### the ridgeplot {.no-title} -```{r warning=FALSE, message=FALSE, echo= FALSE, fig.height=ridgeheight.samples, fig.width=8} -ggplot(df, aes(x = block_length, y = sample)) + - geom_density_ridges_gradient(aes(fill = ..x..), scale = 1.4) + - scale_fill_viridis_c(option = "G", trans = "log10") + - theme_minimal() + - theme(legend.position = "none") + - scale_x_log10( - breaks = trans_breaks("log10", function(x) 10^x), - labels = trans_format("log10", math_format(10^.x)) - ) + - labs(title = "Distribution of Haplotype Lengths by Sample", fill = "Haplotype Length (bp)") + - xlab("Haplotype Length (log scale)") + - ylab("") -``` - +## per sample ### stats per sample {.no-title} ```{r echo= FALSE, message = FALSE, warning = FALSE, out.width="100%"} persample <- df %>% group_by(sample) %>% summarise( @@ -249,4 +235,21 @@ DT::datatable( colnames = c("Sample", "Haplotypes", "Mean SNPs", "Median SNPs", "Mean Haplotype Length", "Median Haplotype Length", "Largest Haplotype"), fillContainer = F ) -``` \ No newline at end of file +``` + +## ridgeplot +### the ridgeplot {.no-title} +```{r warning=FALSE, message=FALSE, echo= FALSE, fig.height=ridgeheight.samples, fig.width=8} +ggplot(df, aes(x = block_length, y = sample, fill = stat(x))) + + geom_density_ridges_gradient(rel_min_height = 0.01) + + scale_fill_viridis_c(option = "G", trans = "log10") + + theme_minimal() + + theme(legend.position = "none") + + scale_x_log10( + breaks = trans_breaks("log10", function(x) 10^x), + labels = trans_format("log10", math_format(10^.x)) + ) + + labs(title = "Distribution of Haplotype Lengths by Sample", fill = "Haplotype Length (bp)") + + xlab("Haplotype Length (log scale)") + + ylab("") +```