-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfragment-size-distributions.qmd
123 lines (104 loc) · 2.75 KB
/
fragment-size-distributions.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
---
title: "Fragement Size Distribution"
author: "Lance Parsons"
date: last-modified
format:
html:
toc: true
code-fold: true
df-print: paged
embed-resources: true
fig-format: svg
editor: source
---
# Load libraries
This project uses [`renv`](https://rstudio.github.io/renv/articles/renv.html)
to keep track of installed packages. Install `renv` if not installed and load
dependencies with `renv::restore()`.
```r
install.packages("renv")
renv::restore()
```
```{r}
#| label: load-packages
#| include: false
#| message: false
library("readr")
library("dplyr")
library("tidyr")
library("stringr")
library("ggplot2")
```
# Read data
## Sample metadata
{{< include _sample-metadata.qmd >}}
## Size distributions
```{r}
#| label: read-size-distributions
human_isize_dir <-
"results/picard_insert_size/Homo_sapiens.GRCh38.dna.primary_assembly/"
exogenous_rna_isize_dir <- "results/picard_insert_size/exogenous_rna/"
isize_hist_data <- tibble(
"insert_size" = integer(),
"count" = integer(),
"sample_unit" = character(),
"genome" = character()
)
for (i in seq_along(sample_units$sample_unit)) {
this_isize_hist_data <- readr::read_tsv(
paste0(
human_isize_dir,
sample_units$sample_unit[i],
".isize.txt"
),
comment = "#",
skip = 11,
col_names = c("insert_size", "count"),
col_types = "ii"
) %>%
mutate(sample_unit = sample_units$sample_unit[i], genome = "human")
isize_hist_data <- rbind(isize_hist_data, this_isize_hist_data)
this_isize_hist_data <- readr::read_tsv(
paste0(
exogenous_rna_isize_dir,
sample_units$sample_unit[i],
".isize.txt"
),
comment = "#",
skip = 11,
col_names = c("insert_size", "count"),
col_types = "ii"
) %>%
mutate(sample_unit = sample_units$sample_unit[i], genome = "exogenous_rna")
isize_hist_data <- rbind(isize_hist_data, this_isize_hist_data)
}
isize_hist_data <- isize_hist_data %>%
dplyr::full_join(sample_units, by = "sample_unit")
```
# Histograms
```{r, fig.width=10, fig.height=10}
#| label: histograms
#| fig-width: 10
#| fig-height: 10
# Choose colors
colors <- c("#003f5c", "#ffa600")
# Group data
isize_hist_data_grouped <- isize_hist_data %>%
group_by(
insert_size, sample_unit, sample_name, batch,
cell_line, exogenous_rna, day, unit_name
) %>%
summarise(count = sum(count), .groups = "keep") %>%
group_by(sample_unit)
# Plot
ggplot(
data = isize_hist_data_grouped %>% mutate(normcount = count / sum(count)),
aes(x = insert_size, y = normcount, group = sample_unit)
) +
geom_line(aes(color = cell_line)) +
scale_color_manual(values = colors) +
ylab("Fraction of fragments of each size") +
xlab("Fragment size") +
facet_wrap(facets = ~ interaction(day, exogenous_rna), ncol = 2) +
theme_bw()
```