From af770cacf4cbffd0f7ccf5b84642a216b9504cfb Mon Sep 17 00:00:00 2001
From: pdimens <pdimens@live.com>
Date: Fri, 12 Jul 2024 10:55:14 -0400
Subject: [PATCH 1/2] rm ggplot and ggridges code

---
 src/harpy/reports/HapCut2.Rmd | 166 ++++++++++++++++++++--------------
 1 file changed, 96 insertions(+), 70 deletions(-)
diff --git a/src/harpy/reports/HapCut2.Rmd b/src/harpy/reports/HapCut2.Rmd
index a7ffbf406..282ed6d21 100644
--- a/src/harpy/reports/HapCut2.Rmd
+++ b/src/harpy/reports/HapCut2.Rmd
@@ -23,25 +23,7 @@ using<-function(...) {
         lapply(need,require,character.only=TRUE)
     }
 }
-using("flexdashboard","dplyr","ggplot2","DT","scales", "highcharter","ggridges")
-```
-
-```{css zoom-lib-src, echo = FALSE, message = FALSE, warning = FALSE}
-script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.7.1/jquery.min.js"
-```
-```{js zoom-jquery, echo = FALSE, message = FALSE, warning = FALSE}
- $(document).ready(function() {
-    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
-    // onClick function for all plots (img's)
-    $('img:not(.zoomImg)').click(function() {
-      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
-      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
-    });
-    // onClick function for zoomImg
-    $('img.zoomImg').click(function() {
-      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
-    });
-  });
+using("flexdashboard","dplyr","DT","scales", "plotly", "highcharter")
 ```
 
 ```{r echo= FALSE, message = FALSE, warning = FALSE}
@@ -66,15 +48,15 @@ contigs <- group_by(df, contig) %>%
   summarize(size = max(pos_end)) %>%
   arrange(desc(size))
 # limit the data to only the 30 largest contigs
-if (nrow(contigs) > 30){
-    .contigs <- contigs[1:30, ]$contig
-} else {
-    .contigs <- contigs$contig
-}
+#if (nrow(contigs) > 30){
+#    .contigs <- contigs[1:30, ]$contig
+#} else {
+#    .contigs <- contigs$contig
+#}
 #pltheight <- round(1.2 * (length(.contigs)), digits = 0)
-ridgeheight <- 1 + round(0.7 * (length(.contigs)), digits = 0)
+#ridgeheight <- 1 + round(0.7 * (length(.contigs)), digits = 0)
 #pltheight.samples <- <- round(1.2 * (length(levels(df$sample))), digits = 0)
-ridgeheight.samples <- 1 + round(0.7 * (length(levels(df$sample))), digits = 0)
+#ridgeheight.samples <- 1 + round(0.7 * (length(levels(df$sample))), digits = 0)
 ```
 
 # General Stats
@@ -157,15 +139,31 @@ hchart(hs, "areaspline", hcaes(x = val, y = freq), color = "#7eb495", name = "co
 ### table per contig {.no-title}
 <h2> Stats Per Contig </h2>
 The plot below shows the distribution of haplotype length (in base pairs) for 
-each contig, up to 30 of the largest contigs, the size of whom is inferred/assumed
-from the end position of the last haplotype. Phasing typically results in extreme
-right-tails in these distributions, therefore the plot presented below has **log-scaled lengths**
-to collapse the right tails for better readability. 
-The table below provides details on the outcome of haplotype phasing on a per-contig basis.
+each contig, up to 300 contigs. The dotted vertical bar represents the mean haplotype length.
+
+Phasing typically has a few very large haplotypes, resulting in extreme 
+right-tails in these distributions, therefore, the haplotype lengths are **log-scaled lengths**
+to collapse the right tail for better readability.
+
+```{r, echo= FALSE, message = FALSE, warning = FALSE}
+dropdown_buttons <- function(COLUMN, cutoff){
+  buttonlist <- list()
+  categories <- unique(COLUMN)
+  idx <- 0
+  for(i in categories){
+    idx <- idx + 1
+    if(idx > cutoff) break
+    visibility <- as.list(rep(FALSE, length(categories)))
+    visibility[[idx]] <- TRUE
+    buttonlist[[idx]] <- list(method = "restyle", label = i, args = list("visible", visibility))
+  }
+  return( list(list(y = 1, buttons = buttonlist)) )
+}
+```
 
 ## Per contig data
 ### stats per contig {.no-title}
-```{r echo= FALSE, message = FALSE, warning = FALSE, out.width="100%"}
+```{r echo= FALSE, message = FALSE, warning = FALSE,paged.print=TRUE, out.width="100%"}
 percontig <- df %>% group_by(contig) %>% summarise(
     n_haplo = round(length(n_snp)),
     mean_snps = round(mean(n_snp), digits = 0),
@@ -179,29 +177,42 @@ DT::datatable(
   percontig,
   rownames = F,
   extensions = 'Buttons',
-  options = list(dom = 'Brtip', buttons = c('csv'), scrollX = TRUE, pageLength = length(levels(df$contig))),
+  options = list(dom = 'Brtip', buttons = c('csv'), scrollX = TRUE),
   colnames = c("Contig", "Total Haplotypes", "Mean SNPs", "Median SNPs", "Mean Haplotype Length", "Median Haplotype Length", "Largest Haplotype"),
-  fillContainer = F
+  fillContainer = T
   )
 ```
 
-## contig ridgeplot
 ### per contig plot {.no-title}
-```{r contig_ridgeplot, warning=FALSE, message=FALSE, echo= FALSE, fig.height=ridgeheight, fig.width=8}
-ggplot(df, aes(x = block_length, y = contig, fill = stat(x))) +
-  geom_density_ridges_gradient() +
-  scale_fill_viridis_c(option = "C", trans = "log10") +
-  theme_minimal() +
-  theme(legend.position = "none") +
-  scale_x_log10(
-     breaks = trans_breaks("log10", function(x) 10^x),
-     labels = trans_format("log10", math_format(10^.x))
-  ) +
-  labs(title = "Distribution of Haplotype Lengths by Contig", fill = "Haplotype Length (bp)") +
-  xlab("Haplotype Length (log scale)") +
-  ylab("")
-```
+```{r contig_ridges, warning=FALSE, message=FALSE, echo= FALSE, out.width="100%"}
+fig <- plot_ly(hoverinfo = "none") %>%
+  layout(
+    xaxis = list(title = "Log-Scaled Haplotype Length (bp)", fixedrange = TRUE),#, type = "log"),
+    yaxis = list(fixedrange = TRUE),
+    title = "Distribution of Haplotype Lengths by Contig",
+    showlegend = F,
+    updatemenus = dropdown_buttons(df$contig, 300)
+  ) %>% config(displayModeBar = FALSE)
+# Loop over the categories
+.idx <- 0
+for (cont in unique(df$contig)) {
+  .idx <- .idx + 1
+  # stop after 300 contigs
+  if(.idx > 300) break
+  subset_cont <- df$block_length[df$contig == cont]
+  fig <- fig %>%
+    add_trace(
+      x = log(subset_cont),
+      type = "violin",
+      name = cont,
+      side = "positive",
+      meanline = list(visible = T),
+      visible = "legendonly"
+    )
+}
 
+fig
+```
 
 # Per-Sample Stats
 ## per sample desc
@@ -209,12 +220,14 @@ ggplot(df, aes(x = block_length, y = contig, fill = stat(x))) +
 <h1> Per-Sample Stats </h1>
 Haplotype phasing occurs per-sample and the table below provides details on the outcome of haplotype phasing for each sample.
 
-The ridgeline plot below shows the distribution of haplotype lengths (in base pairs) for each 
+The plots below shows the distribution of haplotype lengths (in base pairs) for each 
 sample. While no Y-axis for these counts are provided, this plot is intended to be more of
 a visual reference of the relative distributions of haplotype lengths among samples.
-Phasing typically results in extreme right-tails in these distributions, making regular 
-visualization difficult to plot meaningfully. The plot presented below has 
-**log-scaled lengths** to collapse the right tails for better readability. 
+The dotted vertical bar represents the mean haplotype length. 
+
+Phasing typically has a few very large haplotypes, resulting in extreme 
+right-tails in these distributions, therefore, the haplotype lengths are
+**log-scaled lengths** to collapse the right tail for better readability.
 
 ## per sample
 ### stats per sample {.no-title}
@@ -232,25 +245,38 @@ DT::datatable(
   persample,
   rownames = F,
   extensions = 'Buttons',
-  options = list(dom = 'Brtip', buttons = c('csv'), scrollX = TRUE, pageLength = length(levels(df$sample))),
+  options = list(dom = 'Brtip', buttons = c('csv'), scrollX = TRUE),
   colnames = c("Sample", "Haplotypes", "Mean SNPs", "Median SNPs", "Mean Haplotype Length", "Median Haplotype Length", "Largest Haplotype"),
-  fillContainer = F
+  fillContainer = T
 )
 ```
 
-## ridgeplot
 ### the ridgeplot {.no-title}
-```{r sample_ridgeplot, warning=FALSE, message=FALSE, echo= FALSE, fig.height=ridgeheight.samples, fig.width=8}
-ggplot(df, aes(x = block_length, y = sample, fill = stat(x))) +
-  geom_density_ridges_gradient(scale = 0.1) +
-  scale_fill_viridis_c(option = "G", trans = "log10") +
-  theme_minimal() +
-  theme(legend.position = "none") +
-  scale_x_log10(
-     breaks = trans_breaks("log10", function(x) 10^x),
-     labels = trans_format("log10", math_format(10^.x))
-  ) +
-  labs(title = "Distribution of Haplotype Lengths by Sample", fill = "Haplotype Length (bp)") +
-  xlab("Haplotype Length (log scale)") +
-  ylab("")
-```
+```{r sample_ridgeplot, warning=FALSE, message=FALSE, echo= FALSE, out.width="100%"}
+fig <- plot_ly(hoverinfo = "none") %>%
+  layout(
+    xaxis = list(title = "Log-Scaled Haplotype Length (bp)", fixedrange = TRUE),#, type = "log"),
+    yaxis = list(fixedrange = TRUE),
+    title = "Distribution of Haplotype Lengths by Sample",
+    showlegend = F,
+    updatemenus = dropdown_buttons(df$sample, 600)
+  ) %>% config(displayModeBar = FALSE)
+# Loop over the categories
+.idx <- 0
+for (samp in unique(df$sample)) {
+  .idx <- .idx + 1
+  if(.idx > 600) break
+  subset_sample <- df$block_length[df$sample == samp] 
+  fig <- fig %>%
+    add_trace(
+      x = log(subset_sample),
+      type = "violin",
+      name = samp,
+      side = "positive",
+      meanline = list(visible = T),
+      visible = "legendonly"
+    )
+}
+
+fig
+```
\ No newline at end of file

From 31d9b5e689b666d5c9059d9d5f006abd47d99ecc Mon Sep 17 00:00:00 2001
From: pdimens <pdimens@live.com>
Date: Fri, 12 Jul 2024 10:57:23 -0400
Subject: [PATCH 2/2] rm ggridges dep

---
 src/harpy/conda_deps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/harpy/conda_deps.py b/src/harpy/conda_deps.py
index 6efa0b06d..fdc084d85 100644
--- a/src/harpy/conda_deps.py
+++ b/src/harpy/conda_deps.py
@@ -12,7 +12,7 @@ def generate_conda_deps():
         "sv": ["bioconda::leviathan", "bioconda::naibr-plus"],
         "phase" : ["bioconda::hapcut2", "bioconda::whatshap"],
         "simulations" : ["conda-forge::perl", "bioconda::perl-math-random", "bioconda::perl-inline-c", "bioconda::perl-parse-recdescent", "conda-forge::numpy", "bioconda::dwgsim", "alienzj::msort"],
-        "r" : ["conda-forge::r-xml2", "conda-forge::r-highcharter", "conda-forge::r-circlize", "r::r-biocircos", "conda-forge::r-dt", "conda-forge::r-flexdashboard", "conda-forge::r-ggplot2", "conda-forge::r-ggridges", "conda-forge::r-plotly", "conda-forge::r-tidyr"],
+        "r" : ["conda-forge::r-xml2", "conda-forge::r-highcharter", "conda-forge::r-circlize", "r::r-biocircos", "conda-forge::r-dt", "conda-forge::r-flexdashboard", "conda-forge::r-ggplot2", "conda-forge::r-plotly", "conda-forge::r-tidyr"],
         "stitch" : ["bioconda::r-stitch=1.6.10"]
     }
     os.makedirs(".harpy_envs", exist_ok = True)