Commit a668f2c5 authored by fabian.krueger's avatar fabian.krueger
Browse files

compute skill scores for evaluation table

parent f0f0f2a6
......@@ -20,30 +20,55 @@ for(fl in eval_files){
}
}
# need to trnasform booleans to numeric to avoid weird behaviour:
# need to transform booleans to numeric to avoid weird behaviour:
all_evals$interval_coverage_0.5 <- as.numeric(all_evals$interval_coverage_0.5)
all_evals$interval_coverage_0.95 <- as.numeric(all_evals$interval_coverage_0.95)
all_evals$scores_imputed <- as.numeric(all_evals$scores_imputed)
#summary_all_evals <- aggregate(cbind(ae, mean_qscore, interval_coverage_0.5, interval_coverage_0.95) ~ model + target + horizon,
# data = all_evals, FUN = mean, na.rm = TRUE)
# add number of forecasts counted:
#all_evals$n <- 1
#n_forecasts <- aggregate(cbind(n, scores_imputed) ~ model + target + horizon,
# data = all_evals, FUN = sum, na.rm = TRUE)
#summary_all_evals <- merge(summary_all_evals, n_forecasts, by = c("model", "target", "horizon"))
# compute summary evaluation
summary_all_evals <- all_evals %>%
# step 1: prepare data
summary_all_evals1 <- all_evals %>%
# remove weather forecasts from Oct 27 (location changed from KA to B)
filter(! (forecast_date == "2021-10-27" & target != "DAX") ) %>%
# remove cases with missing ae (due to missing truth data)
filter(!is.na(ae)) %>%
# compute mean scores, count # of forecasts and # of imputed scores
# remove imputed benchmark forecasts for other variables
filter(! ( (model == "DAX_benchmark") & (target != "DAX") ),
! ( (model == "t2m_benchmark") & (target != "temperature") ),
! ( (model == "wind_benchmark") & (target != "wind") )) %>%
# assign same name to all benchmark models
mutate(model = if_else(grepl("benchmark", model), "Benchmark", model))
# step 2: stats by model, target and horizon
summary_all_evals2 <- summary_all_evals1 %>%
group_by(model, target, horizon) %>%
summarise(ae = mean(ae),
mean_qscore = mean(mean_qscore),
interval_coverage_0.5 = mean(interval_coverage_0.5),
summarise(ae = mean(ae),
mean_qscore = mean(mean_qscore),
interval_coverage_0.5 = mean(interval_coverage_0.5),
interval_coverage_0.95 = mean(interval_coverage_0.95),
n = n(), scores_imputed = sum(scores_imputed))
n = n(), scores_imputed = sum(scores_imputed),
.groups = "keep") %>% ungroup
# step 3: stats by model and target (aggregated over horizons)
summary_all_evals3 <- summary_all_evals1 %>%
group_by(model, target) %>%
summarise(ae = mean(ae),
mean_qscore = mean(mean_qscore),
interval_coverage_0.5 = mean(interval_coverage_0.5),
interval_coverage_0.95 = mean(interval_coverage_0.5),
n = n(), scores_imputed = sum(scores_imputed),
.groups = "keep") %>% ungroup %>%
mutate(horizon = "[All]")
# Finally: Merge results from steps 2 and 3, compute skill scores
summary_all_evals <- rbind(summary_all_evals2, summary_all_evals3) %>%
group_by(target, horizon) %>%
# convert scores to skill scores (1 = perfect, <0 = worse than benchmark)
mutate(ae = (ae[model == "Benchmark"]-ae)/ae[model == "Benchmark"],
mean_qscore = (mean_qscore[model == "Benchmark"] - mean_qscore)/
mean_qscore[model == "Benchmark"]) %>%
# drop benchmark
filter(model != "Benchmark") %>% ungroup
write.csv(summary_all_evals, file = "ptsfc_viz/plot_data/summary_eval.csv", row.names = FALSE)
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment