-
Notifications
You must be signed in to change notification settings - Fork 0
/
results.qmd
361 lines (294 loc) · 17 KB
/
results.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# Results
This chapter presents the results of the process rate estimator run over all the available dates, depths and columns. Once the process rate estimator is run as instructed in the previous chapter, the results can be found in the sub-directory `scripts/run-process-rate-estimator/output`, or [in the corresponding sub-directory of the GitHub repository](https://github.com/Damian-Oswald/process-rate-estimator/blob/main/scripts/run-process-rate-estimator/output/estimated-process-rates.csv).
## Looking at the process rates over time
@fig-process-rates-tabset show all results, from column 1 to 12 and depths 7.5 cm, 30 cm, 60 cm, 90 cm and 120 cm.
Note that while the measurements are certainly correlated over time and among depth layers, the process rate estimates are performed independently -- the estimates of one day do not influence the estimates of another day *per se*.
::: {#fig-process-rates-tabset}
::: {.column-screen-inset-right}
```{r}
#| eval: true
#| echo: false
#| results: asis
cat("::: {.panel-tabset}\n\n")
for (column in 1:12) {
if(column==1) {
cat("## Column 1\n\n")
} else {
cat("##", column, "\n\n")
}
cat("::: {.panel-tabset}\n\n")
for (depth in PRE::getParameters()$depths) {
if(depth==7.5) {
cat("## Depth =", depth, "cm\n\n")
} else {
cat("##", depth, "cm\n\n")
}
cat(sprintf("![](scripts/run-process-rate-estimator/output/visualized-process-rates-C%s-D%s.svg){width=\"100%%\"}\n\n", column, depth))
}
cat(":::\n\n")
}
cat(":::\n\n")
```
:::
Estimated process rates over time for all columns and depths. *Note: You may click on the column or depth buttons on the top of the tab set to navigate between the visualizations.*
:::
Notably, looking though all the results, we can observe quite a few cases where the estimations for the process rates are negative -- which we know to be impossible. An overview of the processes, columns and depth layers affected by negative process rate estimates is provided in [table @tbl-negative].
:::{.column-page}
```{r}
#| label: tbl-negative
#| echo: false
#| layout-ncol: 3
#| tbl-cap: Percentage of negative process rate estimates (number of days with negative rates divided by total number of days) for each column, depth and process. Instances of 0% are left blank in this table.
library(magrittr)
library(knitr)
library(kableExtra)
options(knitr.kable.NA = '')
data <- read.csv(file.path("scripts","run-process-rate-estimator","output","estimated-process-rates.csv"))
process <- "Nitrification"
df <- with(data, tapply(get(process)<0, list(column, depth), mean)) * 100
df2 <- round(df)
colnames(df2) <- paste(colnames(df2), "cm")
for (i in 1:5) df2[,i] <- paste0(df2[,i],"%")
df2[df2=="0%"] <- NA
df2 %>%
kbl(row.names = TRUE, caption = process, label = process, align = c("rrrrr")) %>%
column_spec(2, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,1])+1]) %>%
column_spec(3, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,2])+1]) %>%
column_spec(4, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,3])+1]) %>%
column_spec(5, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,4])+1]) %>%
column_spec(6, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,5])+1])
process <- "Denitrification"
df <- with(data, tapply(get(process)<0, list(column, depth), mean)) * 100
df2 <- round(df)
colnames(df2) <- paste(colnames(df2), "cm")
for (i in 1:5) df2[,i] <- paste0(df2[,i],"%")
df2[df2=="0%"] <- NA
df2 %>%
kbl(row.names = TRUE, caption = process, label = process, align = c("rrrrr")) %>%
column_spec(2, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,1])+1]) %>%
column_spec(3, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,2])+1]) %>%
column_spec(4, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,3])+1]) %>%
column_spec(5, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,4])+1]) %>%
column_spec(6, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,5])+1])
process <- "Reduction"
df <- with(data, tapply(get(process)<0, list(column, depth), mean)) * 100
df2 <- round(df)
colnames(df2) <- paste(colnames(df2), "cm")
for (i in 1:5) df2[,i] <- paste0(df2[,i],"%")
df2[df2=="0%"] <- NA
df2 %>%
kbl(row.names = TRUE, caption = process, label = process, align = c("rrrrr")) %>%
column_spec(2, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,1])+1]) %>%
column_spec(3, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,2])+1]) %>%
column_spec(4, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,3])+1]) %>%
column_spec(5, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,4])+1]) %>%
column_spec(6, background = colorRampPalette(c("white","#fc5d5e"))(101)[round(df[,5])+1])
```
:::
@tbl-negative shows that nitrification rates are particularly affected by this phenomenon -- especially at lower depths -- with five cases where negative estimates were made for every single day. Denitrification and reduction rates are much less affected.
## Results of the mean process rates
@fig-scatterplot depicts the values of all mean process rates. Notably, column 12 stands out with its markedly negative denitrification and reduction rates, contrasted by extremely high nitrification rates. This observation aligns with the inherent correlation among the three process rate estimates.
:::{.column-page}
```{r}
#| eval: true
#| echo: false
#| fig-format: svg
#| fig-width: 10.8
#| fig-height: 3.6
#| out-width: 100%
#| label: fig-scatterplot
#| fig-cap: Combinations of scatterplots of the mean estimated process rates for nitrification, denitrification and reduction. Each point represents one mean process rate for a given depth and column. For some estimates, the column is indicated on the top. The grey area represents the physically impossible space.
# read the results
data <- read.csv(file.path("scripts","run-process-rate-estimator","output","estimated-process-rates.csv"))
# compute the mean process rates over entire time
for (x in c("Nitrification", "Denitrification", "Reduction")) {
assign(x, tapply(data[,x], with(data, list(variety, depth, column)), mean))
}
names(dimnames(Nitrification)) <- names(dimnames(Denitrification)) <- names(dimnames(Reduction)) <- c("Variety", "Depth", "Column")
# compute total produced nitrogen
Production <- Nitrification + Denitrification
with(data, tapply(Nitrification, list(depth, column, variety), mean)) |>
reshape2::melt(varnames = c("depth", "column", "variety"), value.name = "Nitrification") |>
na.omit() -> df
with(data, tapply(Denitrification, list(depth, column, variety), mean)) |>
reshape2::melt(varnames = c("depth", "column", "variety"), value.name = "Denitrification") |>
na.omit() |>
subset(select = "Denitrification") |>
unlist() -> df$Denitrification
with(data, tapply(Reduction, list(depth, column, variety), mean)) |>
reshape2::melt(varnames = c("depth", "column", "variety"), value.name = "Reduction") |>
na.omit() |>
subset(select = "Reduction") |>
unlist() -> df$Reduction
f <- function(x, y, ...) {
plot(df[,x], df[,y], pch = 21, las = 1, ...)
polygon(x = c(0,-1000,-1000,0), y = c(-1000,-1000,1000,1000), col = "gray90", border = NA)
polygon(x = c(-1000,1000,1000,-1000), y = c(0,0,-1000,-1000), col = "gray90", border = NA)
polygon(x = c(0,1000,1000,0), y = c(0,0,1000,1000), lwd = 2)
box()
grid(col = 1, lty = 1, lwd = 0.5)
points(df[,x], df[,y], pch = 21, bg = "#fc5d5e")
for (i in 1:length(labellist)) {
text(df[labellist[i],x], df[labellist[i],y], labels = df[labellist[i],"column"], pos = 3, font = 2, cex = 0.8)
}
}
par(mar = c(4,4,0,0)+1, mfrow = c(1,3))
labellist <- "116"
f("Denitrification", "Nitrification",
ylim = c(-10,100),
xlab = expression("Denitrification [g N"[2]*"O-N]"),
ylab = expression("Nitrification [g N"[2]*"O-N]"))
labellist <- rownames(subset(df, subset = Reduction < 0))
f("Reduction", "Nitrification",
ylim = c(-10,100), xlim = c(-30,40),
xlab = expression("Reduction [g N"[2]*"O-N]"),
ylab = expression("Nitrification [g N"[2]*"O-N]"))
f("Denitrification", "Reduction",
ylim = c(-30,45), xlim = c(-10,50),
xlab = expression("Denitrification [g N"[2]*"O-N]"),
ylab = expression("Reduction [g N"[2]*"O-N]"))
```
:::
Although physically impossible, the uncorrected values of mean nitrification, denitrification, and reduction are used for further analyses in the interest of intellectual honesty.
The results presented in [table @tbl-by-depth-and-variety] also show the mean nitrous oxide nitrification ($\ce{N2O}_{\text{nit}}$), denitrification ($\ce{N2O}_{\text{den}}$), consumption ($\ce{N2O}_{\text{consumed}}$), and production ($\ce{N2O}_{\text{produced}}$). The N~2~O production is hereby defined as:
$$\ce{N2O}_{\text{production}} = \ce{N2O}_{\text{nit}} + \ce{N2O}_{\text{den}}$${#eq-N2O-produced}
Meanwhile, the consumption is simply the mean N~2~O reduction.
```{r}
#| echo: false
#| tbl-colwidths: [22,14,16,16,16,16]
#| label: tbl-by-depth-and-variety
#| tbl-cap: "Overall results of the mean estimated process rates (μ ± σ) by depth layer and variety. N~2~O~nit~ is the mean nitrification rate over the entire estimated period, while N~2~O~den~ is the mean denitrification rate. For reference, this table also displays N~2~O~produced~, which is simply N~2~O~nit~ + N~2~O~den~, as well as N~2~O~consumed~, which is the mean reduction rate."
# write function to paste mean ± sd
f <- function(x) paste(signif(mean(x, na.rm = TRUE), 2), "±", signif(sd(x, na.rm = TRUE), 2))
# bind results to data frame
df <- cbind(reshape2::melt(apply(Nitrification, 1:2, f)),
pod = reshape2::melt(apply(Denitrification, 1:2, f))[,3],
red = reshape2::melt(apply(Production, 1:2, f))[,3],
den = reshape2::melt(apply(Reduction, 1:2, f))[,3])
df <- df[order(df[,"Variety"]),]
colnames(df) <- c("Variety", "Depth [cm]", "N~2~O~nit~", "N~2~O~den~", "N~2~O~produced~", "N~2~O~consumed~")
df[,1] <- as.character(df[,1])
df[((1:nrow(df))[-c((0:4)*5+1)]),1] <- ""
knitr::kable(df, row.names = FALSE, align = "llrrrr")
```
## Testing for variety effects
Owing to all processes' significant divergence of the linear model's residuals from a normal distribution, as determined by the Shapiro-Wilk normality test, the Kruskal-Wallis rank sum test was employed to examine group variation among the varieties. These respective tests were performed utilizing the relevant functions -- `shapiro.test` and `kruskal.test`, from the `stats` package, as referred to in [@r2023language].
```{r}
#| echo: false
#| tbl-colwidths: [20,25,30,20,5]
#| label: tbl-kruskal-wallis-variety
#| tbl-cap: Results of the Kruskal-Wallis rank sum test inspecting the effects of variety on the mean estimated process rates.
# read data
data <- read.csv(file.path("scripts","run-process-rate-estimator","output","estimated-process-rates.csv"))
# create empty data frame
results <- data.frame(process = NULL, statistic = NULL, df = NULL, p.value = NULL, stars = NULL)
# repeat this step for each process
for (process in c("Nitrification","Denitrification","Reduction")) {
# compute mean process rate (over all days)
with(data, tapply(get(process), list(depth, column, variety), sum)) |>
reshape2::melt(varnames = c("depth", "column", "variety"), value.name = process) |>
na.omit() -> data2
# conduct Kruskal-Wallis rank sum test
kt <- with(data2, kruskal.test(x = get(process), g = variety))
formatpvalue <- function(p) if (p>0.001) return(signif(p,2)) else return("p < 0.001")
# attach results to results data frame
results <- rbind(results, data.frame(process = process, statistic = kt$statistic, df = kt$parameter, p.value = formatpvalue(kt$p.value), stars = gtools::stars.pval(kt$p.value)))
}
# remove row names
rownames(results) <- NULL
# create table
knitr::kable(results,
col.names = c("Process", "Kruskal-Wallis χ²", "degrees of freedom", "p-value", ""),
align = "lrrrl",
digits = 2)
```
Among the different varieties, there were no significant group differences detected for neither N~2~O~nit~ nor N~2~O~den~, and hence also not for N~2~O~produced~ (@tbl-by-variety). However, significant differences among the varieties for N~2~O~consumed~ were detected (p = 0.01098). The group means and standard deviations for each variety are showcased in [table @tbl-by-variety].
```{r}
#| echo: false
#| tbl-colwidths: [36,16,16,16,16]
#| label: tbl-by-variety
#| tbl-cap: "Values from [table @tbl-by-depth-and-variety] aggregated by variety. The superscript compact letter display indicates significant differences among groups based on Dunn's Kruskal-Wallis multiple comparison (α = 0.05)."
# STATISTICAL TESTS:
# ==================
cld <- function(formula, data, order) {
PT <- FSA::dunnTest(formula, data = data)
CLD <- rcompanion::cldList(comparison = PT$res$Comparison,
p.value = PT$res$P.adj,
threshold = 0.05)
CLD[order,]
}
DEPTH <- VARIETY <- data.frame()
for (j in c("Nitrification", "Denitrification", "Production", "Reduction")) {
df <- na.omit(reshape2::melt(get(j), value.name = "Process"))
for (i in c("Variety", "Depth")) df[,i] <- as.factor(df[,i])
model <- lm(Process ~ Depth, data = df)
PT <- FSA::dunnTest(Process ~ Depth, data = df)
CLD <- rcompanion::cldList(comparison = PT$res$Comparison,
p.value = PT$res$P.adj,
threshold = 0.05)
CLD <- CLD[c(4,2,3,5,1),]
VARIETY <- rbind(VARIETY, data.frame(Process = j, cld(Process ~ Variety, df, 1:4)))
DEPTH <- rbind(DEPTH, data.frame(Process = j, cld(Process ~ Depth, df, c(4,2,3,5,1))))
}
processes <- c("Nitrification", "Denitrification", "Production", "Reduction")
# bind results to data frame
df <- cbind(nit = reshape2::melt(apply(Nitrification, 1, f)),
pod = reshape2::melt(apply(Denitrification, 1, f)),
red = reshape2::melt(apply(Production, 1, f)),
den = reshape2::melt(apply(Reduction, 1, f)))
df <- data.frame(rownames(df), df)
for (i in 1:4) {
CLD <- unlist(subset(VARIETY, subset = Process==processes[i], select = Letter))
if(length(unique(CLD))>1) df[,i+1] <- paste0(df[,i+1], " ^*", CLD,"*^")
}
colnames(df) <- c("Variety", "N~2~O~nit~", "N~2~O~den~", "N~2~O~produced~", "N~2~O~consumed~")
knitr::kable(df, row.names = FALSE, align = "lrrrr")
```
## Testing for depth effects
Conversely, N~2~O~consumed~ did not show any significant group differences with respect to the depth layers. However, all other processes did: N~2~O~nit~ (p = 1.005e-07) nor N~2~O~den~ (p = 0.001029), and hence also for N~2~O~produced~ (p = 3.883e-08).
```{r}
#| echo: false
#| tbl-colwidths: [20,25,30,20,5]
#| label: tbl-kruskal-wallis-depth
#| tbl-cap: Results of the Kruskal-Wallis rank sum test inspecting the effects of depth layer on the mean process rates over all days.
# read data
data <- read.csv(file.path("scripts","run-process-rate-estimator","output","estimated-process-rates.csv"))
# create empty data frame
results <- data.frame(process = NULL, statistic = NULL, df = NULL, p.value = NULL, stars = NULL)
# repeat this step for each process
for (process in c("Nitrification","Denitrification","Reduction")) {
# compute mean process rate (over all days)
with(data, tapply(get(process), list(depth, column, variety), mean)) |>
reshape2::melt(varnames = c("depth", "column", "variety"), value.name = process) |>
na.omit() -> data2
# conduct Kruskal-Wallis rank sum test
kt <- with(data2, kruskal.test(x = get(process), g = depth))
# attach results to results data frame
results <- rbind(results, data.frame(process = process, statistic = kt$statistic, df = kt$parameter, p.value = formatpvalue(kt$p.value), stars = gtools::stars.pval(kt$p.value)))
}
# remove row names
rownames(results) <- NULL
# create table
knitr::kable(results,
col.names = c("Process", "Kruskal-Wallis χ²", "degrees of freedom", "p-value", ""),
align = "lrrrl",
digits = 2)
```
```{r}
#| echo: false
#| tbl-colwidths: [22,14,16,16,16,16]
#| label: tbl-by-depth
#| tbl-cap: "Values in @tbl-by-depth-and-variety aggregated by depth layer. The superscript compact letter display indicates significant differences among groups based on Dunn's Kruskal-Wallis multiple comparison (α = 0.05)."
# bind results to data frame
df <- cbind(nit = reshape2::melt(apply(Nitrification, 2, f)),
pod = reshape2::melt(apply(Denitrification, 2, f)),
red = reshape2::melt(apply(Production, 2, f)),
den = reshape2::melt(apply(Reduction, 2, f)))
df <- data.frame(x = rep("",5), d = rownames(df), df)
colnames(df) <- c("", "Depth [cm]", "N~2~O~nit~", "N~2~O~den~", "N~2~O~produced~", "N~2~O~consumed~")
for (i in 1:4) {
CLD <- unlist(subset(DEPTH, subset = Process==processes[i], select = Letter))
if(length(unique(CLD))>1) df[,i+2] <- paste0(df[,i+2], " ^*", CLD,"*^")
}
knitr::kable(df, row.names = FALSE, align = "llrrrr")
```