1
+ # # Data preparation: Run
2
+
3
+ # rm(list=ls())
4
+ #
5
+ # data_dir <- "/media/sebastian/Elements/Postproc_NN/data/"
6
+ # load(paste0(data_dir, "data_all.Rdata"))
7
+ #
8
+ # # remove sm data (missing values)
9
+ # data$sm_mean <- NULL
10
+ # data$sm_var <- NULL
11
+ # head(data)
12
+ #
13
+ # library(scoringRules)
14
+ # library(lubridate)
15
+ # library(crch)
16
+ #
17
+ # train_end <- as.Date("2016-01-01 00:00 UTC") - days(2)
18
+ # train_start <- data$date[1]
19
+ #
20
+ # data_train_all <- subset(data, date >= train_start & date <= train_end)
21
+ #
22
+ # eval_start <- as.Date("2016-01-01 00:00 UTC")
23
+ # eval_end <- as.Date("2016-12-31 00:00 UTC")
24
+ # eval_dates <- seq(eval_start, eval_end, by = "1 day")
25
+ #
26
+ # data_eval_all <- subset(data, date >= eval_start & date <= eval_end)
27
+ #
28
+ # out_loc <- rep(NA, nrow(data_eval_all))
29
+ # out_sc <- rep(NA, nrow(data_eval_all))
30
+ #
31
+ # stations_list <- unique(data$station)
32
+
33
+ # # Logs: run
34
+
35
+ # ncoef_loc <- NULL
36
+ # ncoef_sc <- NULL
37
+ #
38
+ # for(this_station in stations_list){
39
+ # # progress indicator
40
+ # progind <- which(stations_list == this_station)
41
+ # if(progind %% 10 == 0){
42
+ # cat(progind, "of", length(stations_list), "started at", paste(Sys.time()), "\n")
43
+ # }
44
+ #
45
+ # # data_train <- subset(data, date >= train_start & date <= train_end & station == this_station)
46
+ # data_train <- subset(data_train_all, station == this_station)
47
+ #
48
+ # # remove incomplete cases (= NA obs or fc)
49
+ # data_train <- data_train[complete.cases(data_train), ]
50
+ # if(nrow(data_train) < 10){
51
+ # next
52
+ # }
53
+ #
54
+ # ## NOTE: boosting is only implemented for link.scale = "log", otherwise cryptic error message
55
+ # crch_model <- crch(obs ~ .|.,
56
+ # data = data_train[,-which(names(data) %in% c("date", "station"))],
57
+ # dist = "gaussian",
58
+ # link.scale = "log",
59
+ # method = "boosting",
60
+ # maxit = 1000,
61
+ # nu = 0.05,
62
+ # mstop = "aic")
63
+ #
64
+ # ncoef_loc[progind] <- sum(crch_model$coefficients$location > 0)
65
+ # ncoef_sc[progind] <- sum(crch_model$coefficients$scale > 0)
66
+ # }
67
+ #
68
+ # save(ncoef_loc, ncoef_sc, file = "ncoef_LogS.Rdata")
69
+
70
+ # # CRPS: run
71
+
72
+ # ncoef_loc <- NULL
73
+ # ncoef_sc <- NULL
74
+ #
75
+ # for(this_station in stations_list){
76
+ # # progress indicator
77
+ # progind <- which(stations_list == this_station)
78
+ # if(progind %% 10 == 0){
79
+ # cat(progind, "of", length(stations_list), "started at", paste(Sys.time()), "\n")
80
+ # }
81
+ #
82
+ # # data_train <- subset(data, date >= train_start & date <= train_end & station == this_station)
83
+ # data_train <- subset(data_train_all, station == this_station)
84
+ #
85
+ # # remove incomplete cases (= NA obs or fc)
86
+ # data_train <- data_train[complete.cases(data_train), ]
87
+ # if(nrow(data_train) < 10){
88
+ # next
89
+ # }
90
+ #
91
+ # ## NOTE: boosting is only implemented for link.scale = "log", otherwise cryptic error message
92
+ # crch_model <- crch(obs ~ .|.,
93
+ # data = data_train[,-which(names(data) %in% c("date", "station"))],
94
+ # dist = "gaussian",
95
+ # link.scale = "log",
96
+ # method = "boosting",
97
+ # type = "crps",
98
+ # maxit = 1000,
99
+ # nu = 0.05,
100
+ # mstop = "aic")
101
+ #
102
+ # ncoef_loc[progind] <- sum(crch_model$coefficients$location > 0)
103
+ # ncoef_sc[progind] <- sum(crch_model$coefficients$scale > 0)
104
+ # }
105
+ # save(ncoef_loc, ncoef_sc, file = "ncoef_CRPS.Rdata")
106
+
107
+ # # Plot
108
+ rm(list = ls())
109
+
110
+ load(" ncoef_CRPS.Rdata" )
111
+ ncoef_loc_crps <- ncoef_loc
112
+ ncoef_sc_crps <- ncoef_sc
113
+
114
+ load(" ncoef_LogS.Rdata" )
115
+ ncoef_loc_logs <- ncoef_loc
116
+ ncoef_sc_logs <- ncoef_sc
117
+
118
+ pdf(" ncoef_boosting_CRPS-LogS.pdf" , width = 10 , height = 5 , pointsize = 12 )
119
+
120
+ par(mfrow = c(1 ,2 ))
121
+
122
+ # plot for location parameter
123
+ ploc_logs <- hist(ncoef_loc_logs , breaks = seq(0 ,25 ,1 ), plot = FALSE )
124
+ ploc_crps <- hist(ncoef_loc_crps , breaks = seq(0 ,25 ,1 ), plot = FALSE )
125
+ plot(ploc_logs , col = rgb(0 ,0 ,1 ,1 / 4 ), xlim = c(0 ,25 ), ylim = c(0 ,150 ),
126
+ main = " Location" , xlab = " Number of selected predictors" ) # first histogram
127
+ plot(ploc_crps , col = rgb(1 ,0 ,0 ,1 / 4 ), xlim = c(0 ,25 ), add = T ) # second
128
+ legend(" topright" , c(" LogS" , " CRPS" ), fill = c(rgb(0 ,0 ,1 ,1 / 4 ), rgb(1 ,0 ,0 ,1 / 4 )))
129
+
130
+ # plot for scale parameter
131
+ psc_logs <- hist(ncoef_sc_logs , breaks = seq(0 ,25 ,1 ), plot = FALSE )
132
+ psc_crps <- hist(ncoef_sc_crps , breaks = seq(0 ,25 ,1 ), plot = FALSE )
133
+ plot(psc_logs , col = rgb(0 ,0 ,1 ,1 / 4 ), xlim = c(0 ,25 ), ylim = c(0 ,150 ),
134
+ main = " Scale" , xlab = " Number of selected predictors" ) # first histogram
135
+ plot(psc_crps , col = rgb(1 ,0 ,0 ,1 / 4 ), xlim = c(0 ,25 ), add = T ) # second
136
+ legend(" topright" , c(" LogS" , " CRPS" ), fill = c(rgb(0 ,0 ,1 ,1 / 4 ), rgb(1 ,0 ,0 ,1 / 4 )))
137
+
138
+ dev.off()
0 commit comments