-
Notifications
You must be signed in to change notification settings - Fork 0
/
IL6_distr.R
127 lines (97 loc) · 3.86 KB
/
IL6_distr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Script to analyse distributions of cytokine data
# Written by Jimmy Hermansson
# Modified by Love Ahnström
# Clear environment
rm(list=ls())
# 1. Read data
# Require packages
require(foreign)
require(tidyverse)
require(fitdistrplus)
require(haven)
require(moments)
# This file needs to be downloaded file from Meta Research drive
IL6_only_stacked <- read.csv("IL6-only-stacked.csv")
# To simplify the syntax when writing files
setwd("output")
# Group by dataset, for each group run the model and export csv file, once for dnorm and once for dexp
IL6_only_stacked %>%
group_by(ind) %>%
group_walk(~{
# For readability
IL6_raw <- .x$values
IL6_log <- log(IL6_raw)
dataset_name <- .y$ind[1]
# Fitdistr
test_dnorm <- fitdist(IL6_raw, distr = dnorm)
model_summary_dnorm <- summary(test_dnorm)
# Scaled by a factor of 1/1000 due to model limitations
test_dexp <- fitdist(IL6_raw/1000, distr = dexp)
model_summary_dexp <- summary(test_dexp)
# Shapiro-wilk normality test
shapiro_test <- shapiro.test(IL6_raw)
shapiro_test_log <- shapiro.test(IL6_log)
# Skewness
skewness_norm <- skewness(IL6_raw)
skewness_log <- skewness(IL6_log)
#group_walk silences output to console, going around this by writing csv files instead
df <- data.frame(
dataset=.y$ind[1],
n=model_summary_dnorm$n,
w_raw = round(shapiro_test$statistic, digits=3),
p_raw = signif(shapiro_test$p.value, digits=4),
w_log = round(shapiro_test_log$statistic, digits=3) ,
p_log = signif(shapiro_test_log$p.value, digits=4),
loglik_norm=round(model_summary_dnorm$loglik, digits=1),
aic_norm=round(model_summary_dnorm$aic, digits=1),
bic_norm=round(model_summary_dnorm$bic, digits=1),
loglik_exp=round(model_summary_dexp$loglik, digits=1),
aic_exp=round(model_summary_dexp$aic, digits=1),
bic_exp=round(model_summary_dexp$bic, digits=1),
skew_norm=round(skewness_norm, digits=3),
skew_log=round(skewness_log, digits=3)
)
write.csv(df, sprintf("%s-fitdistr.csv", .y$ind[1]), row.names = F)
# Create plots
histIL6 <- hist(IL6_raw)
histlogIL6 <- hist(IL6_log)
qqIL6 <- qqnorm(IL6_raw)
qqlogIL6 <- qqnorm(IL6_log)
# Write to pdf
setwd("../pdf")
pdf(sprintf("%s.pdf", dataset_name), width = 5, height = 5)
par(mfrow=c(2,2), mar=c(3,3,3,3))
plot(histIL6, main = "Raw data", xlab = "IL-6 (pg/µl)")
plot(histlogIL6, main = "Log-transformed", xlab = "IL-6 (pg/µl)")
plot(qqIL6, xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", frame.plot = F)
abline(0,1)
plot(qqlogIL6, xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", frame.plot = F)
abline(0,1)
dev.off()
# Write to image
setwd("../png")
png(sprintf("%s.png", dataset_name), width = 5, height = 5, units="in", res=150)
par(mfrow=c(2,2), mar=c(3,3,3,3))
plot(histIL6, main = "Raw data", xlab = "IL-6 (pg/µl)")
plot(histlogIL6, main = "Log-transformed", xlab = "IL-6 (pg/µl)")
plot(qqIL6, xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", frame.plot = F)
abline(0,1)
plot(qqlogIL6, xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", frame.plot = F)
abline(0,1)
dev.off()
setwd("../output")
})
#Read and merge the CSV files again
outputs <- list.files(path='.') %>%
lapply(read_csv) %>%
bind_rows
write.csv(outputs, "../all_outputs.csv", row.names = F)
setwd("..")
png("sample-size-norm.png", width = 5, height = 5, units="in", res=150)
plot(n ~ skew_norm, data = outputs, main = "Raw data", xlab="Skewness")
abline(v=1)
dev.off()
png("sample-size-log.png", width = 5, height = 5, units="in", res=150)
plot(n ~ skew_log, data = outputs, main = "Log-transformed", xlab="Skewness")
abline(v=1)
dev.off()