-
Notifications
You must be signed in to change notification settings - Fork 3
/
benchmark_forecast_comparison.R
179 lines (151 loc) · 9.7 KB
/
benchmark_forecast_comparison.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Author: Kate Doubleday
# Last updated: 5/13/2020
# -----------------------------------------------------------------
# Execute this script to generate a Results/ folder containing 200+ plots comparing the benchmarks
# for each of the 7 SURFRAD sites, .csv files containing the forecast quantiles at each
# time-step through the year 2018, and .csv files comparing the CRPS summary statistics
# for each site across the methods at each temporal scale. If the ECMWF data is not available,
# the two ECMWF-based forecasts will be skipped.
# -----------------------------------------------------------------
# Load dependencies
library(solarbenchmarks)
# -----------------------------------------------------------------
print("Benchmark beginning, and will run for a while. See Results directory and its sub-directories.")
# -----------------------------------------------------------------
# Define constants
telemetry_directory <- here::here("GHI_files")
forecast_directory <- here::here("ECMWF_files", "SURFRAD_sites")
output_directory <- here::here("Results")
dir.create(output_directory, showWarnings = FALSE)
pit_directory <- here::here("Results", "PIT_histograms")
dir.create(pit_directory, showWarnings = FALSE)
ts_directory <- here::here("Results", "Quantile_time_series")
dir.create(ts_directory, showWarnings = FALSE)
qs_directory <- here::here("Results", "Quantile_score_plots")
dir.create(qs_directory, showWarnings = FALSE)
site_names <- c("Bondville",
"Boulder",
"Desert_Rock",
"Fort_Peck",
"Goodwin_Creek",
"Penn_State",
"Sioux_Falls")
percentiles <- seq(0.01, 0.99, by=0.01)
intervals <- seq(0.1, 0.9, by=0.1) # Central intervals for sharpness evaluation
num_peen_days <- 20 # Number of members in the hourly persistence ensemble
intrahour_training_hours <- 2 # Number of hours of preceeding data to use for training intra-hour PeEn and intra-hour Gaussian methods
mcm_training_days <- 20 # Number of days of preceeding data to use for training intra-hour MCM method
histogram_bins <- 20 # Number of bins to use in PIT histogram
resolution <- c("Hourly", "Intrahour")
# Choose the appropriate subset of forecast methods, based on temporal resolution and ECMWF data availability (or not).
if (all(sapply(site_names, FUN=function(site) paste(site, ".nc", sep="")) %in% list.files(file.path(forecast_directory)))) {
forecast_names <- list(Hourly=c("Climatology", "Ch-PeEn", "PeEn", "ECMWF Ensemble", "ECMWF Gaussian"))
} else forecast_names <- list(Hourly=c("Climatology", "Ch-PeEn", "PeEn"))
forecast_names$Intrahour<- c("Climatology", "Ch-PeEn", "PeEn", "Smart persistence Gaussian", "MCM")
metric_names <- c("CRPS", "Left-tail weighted CRPS", "Center weighted CRPS", "Right-tail weighted CRPS")
# -----------------------------------------------------------------
# Data export options
# Option to export a .csv of the forecast quantiles
csv_export <- T
# Option to export .R graph objects
R_graph_export <- F
# -----------------------------------------------------------------
get_site_data <- function(res, site, metrics_df, reliability_df, interval_width_df) {
# Load GHI ECMWF forecasts
if (file.exists(file.path(forecast_directory, paste(site, ".nc", sep="")))) {
nc <- ncdf4::nc_open(file.path(forecast_directory, paste(site, ".nc", sep="")))
nwp <- ncdf4::ncvar_get(nc, varid="irradiance")
ncdf4::nc_close(nc)
ndays <- nrow(nwp) # For loading in the same number of days of SURFRAD data, starting Jan. 1st. Useful during testing
} else{ # If data is unavailable
ndays <- 365
}
# Load GHI telemetry and sun_up indicator
nc <- ncdf4::nc_open(file.path(telemetry_directory, res, list.files(file.path(telemetry_directory, res), pattern=glob2rx(paste("*", site, "*2018*", sep="")))))
GHI <- ncdf4::ncvar_get(nc, varid="irradiance", count=c(ndays, -1))
sun_up <- ncdf4::ncvar_get(nc, varid="sun_up", count=c(ndays, -1))
clearsky_GHI <- ncdf4::ncvar_get(nc, varid="clearsky_irradiance", count=c(ndays, -1))
ncdf4::nc_close(nc)
sun_up <- apply(sun_up, MARGIN = c(1,2), FUN = as.logical)
# Load GHI telemetry and sun_up indicator from end of 2017 for PeEn forecast
nc <- ncdf4::nc_open(file.path(telemetry_directory, res, list.files(file.path(telemetry_directory, res), pattern=glob2rx(paste("*", site, "*2017*", sep="")))))
GHI_2017 <- ncdf4::ncvar_get(nc, varid="irradiance")
clearsky_GHI_2017 <- ncdf4::ncvar_get(nc, varid="clearsky_irradiance")
sun_up_2017 <- ncdf4::ncvar_get(nc, varid="sun_up")
ncdf4::nc_close(nc)
sun_up_2017 <- apply(sun_up_2017, MARGIN = c(1,2), FUN = as.logical)
# -----------------------------------------------------------------
# Conduct forecasts and get metrics
ts_per_hour <- ncol(GHI)/24
for (method in forecast_names[[res]]) {
if (method=="Climatology") {
fc <- forecast_climatology(GHI, percentiles, sun_up)
} else if (method=="ECMWF Ensemble") {
fc <- forecast_NWP(nwp, percentiles, sun_up)
} else if (method=="Ch-PeEn") {
fc <- forecast_CH_PeEn(GHI, percentiles, sun_up, clearsky_GHI, ts_per_hour)
} else if (method=="PeEn") {
if (res == "Hourly") {
fc <- forecast_PeEn_hourly(GHI, percentiles, sun_up, num_peen_days, GHI_2017)
} else fc <- forecast_PeEn_intrahour(as.vector(t(GHI)), percentiles, as.vector(t(sun_up)),
as.vector(t(clearsky_GHI)), ts_per_hour=ts_per_hour, nhours=intrahour_training_hours)
} else if (method=="ECMWF Gaussian"){
fc <- forecast_Gaussian_hourly(nwp, GHI, percentiles, sun_up, clearsky_GHI)
} else if (method=="Smart persistence Gaussian") {
fc <- forecast_Gaussian_intrahour(as.vector(t(GHI)), percentiles, as.vector(t(sun_up)),
as.vector(t(clearsky_GHI)), ts_per_hour=ts_per_hour, nhours=intrahour_training_hours)
} else if (method=="MCM") {
fc <- forecast_mcm(as.vector(t(GHI)), as.vector(t(GHI_2017)),
percentiles, as.vector(t(sun_up)), as.vector(t(sun_up_2017)),
as.vector(t(clearsky_GHI)), as.vector(t(clearsky_GHI_2017)),
ts_per_hour, mcm_training_days)
} else stop(paste("Forecast method", method, "not recognized"))
metrics_df[site, method, "CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "none")
metrics_df[site, method, "Left-tail weighted CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "left")
metrics_df[site, method, "Center weighted CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "center")
metrics_df[site, method, "Right-tail weighted CRPS"] <- qwCRPS(fc, as.vector(t(GHI)), as.vector(t(sun_up)), weighting = "right")
reliability_df[site, method,] <- reliability(fc, as.vector(t(GHI)), as.vector(t(sun_up)))
interval_width_df[site, method,] <- interval_width(fc, as.vector(t(sun_up)), intervals = intervals)$widths
# If desired, export this forecast matrix of quantiles to a .csv file for future use.
if (csv_export) {
write.table(fc, file=file.path(ts_directory, paste(site, res, method, "quantiles.csv", sep=" ")), sep=",", row.names=F)
}
# Plot PIT histogram
plot_PIT_histogram(fc, as.vector(t(GHI)), as.vector(t(sun_up)), histogram_bins, site, res, method, pit_directory, R_graph_export)
# Plot unweighted and weighted quantile score comparisons
plot_quantile_score(fc, as.vector(t(GHI)), as.vector(t(sun_up)), percentiles, site, res, method, qs_directory, R_graph_export)
# Export sample forecasts: Days 133-135, May 13-15th
window <- c(132*24*ts_per_hour+1 + 6*ts_per_hour, 135*24*ts_per_hour + 6*ts_per_hour)
g <- plot_fanplot(fc, as.vector(t(GHI)), window, ts_per_hour, output_directory, site, res, method, R_graph_export)
}
return(list(metrics_df=metrics_df, reliability_df=reliability_df, interval_width_df=interval_width_df))
}
# Repeat the analysis for each temporal resolution: Hourly forecast or intra-hourly forecast
for (res in resolution) {
# Prepare results data frames
metrics_df <- array(dim=c(length(site_names), length(forecast_names[[res]]), length(metric_names)), dimnames=list(site_names, forecast_names[[res]], metric_names))
reliability_df <- array(dim=c(length(site_names), length(forecast_names[[res]]), length(percentiles)), dimnames=list(site_names, forecast_names[[res]], percentiles))
interval_width_df <- array(dim=c(length(site_names), length(forecast_names[[res]]), length(intervals)), dimnames=list(site_names, forecast_names[[res]], intervals))
# Repeat the analysis for each of the 7 SURFRAD sites
for (site in site_names) {
# Conduct the forecast validation for this site, saving the results for each method to the three data frames
results <- get_site_data(res, site, metrics_df, reliability_df, interval_width_df)
metrics_df <- results$metrics_df
reliability_df <- results$reliability_df
interval_width_df <- results$interval_width_df
# Export reliability and interval width graphs comparing the methods for each site
# ------------------------------------------------------------------
shapes <- c(17, 0, 16, 5, 4) # Select point shapes for graphs
plot_reliability(reliability_df, site, res, shapes, output_directory, R_graph_export)
plot_interval_width(interval_width_df, site, res, shapes, output_directory, R_graph_export)
}
# -----------------------------------------------------------------
# Export CRPS metrics table, comparing the methods across the sites
sink(file.path(output_directory, paste('CRPS', res, 'results.csv', sep="_")))
for (metric in dimnames(metrics_df)[[3]]) {
cat(paste(metric, "\n"))
write.csv(metrics_df[,,metric])
cat("\n")
}
sink()
}