@@ -10,7 +10,7 @@ library("tidymodels")
10
10
# #############
11
11
# # PARAMETERS
12
12
# #############
13
- fname = " PI /data.RData" # # input data
13
+ fname = " example /data.RData" # # input data
14
14
ncpus = 4
15
15
upsample_ratio = 1.25 # # default = 1
16
16
# ##################
@@ -22,9 +22,11 @@ load(fname)
22
22
23
23
# # data cleaning
24
24
writeLines(" - data cleaning" )
25
+ # # using support variable "category" for upsampling
25
26
temp <- test %> %
26
27
select(- batch ) | >
27
- mutate(across(c(study_bis , sex , country_simply , opioid , canc_diag_simply , chemotherapy ), as.factor ))
28
+ mutate(across(c(study , sex , country , treatment , diagnosis , therapy ), as.factor ),
29
+ category = cut(score_intensity , breaks = c(- Inf , 2 , Inf ), labels = c(" low" ," high" )))
28
30
29
31
# Setup parallel backend to use n. processors
30
32
cl <- makeCluster(ncpus )
@@ -33,12 +35,7 @@ registerDoParallel(cl)
33
35
# ###########################
34
36
# # tidymodels con upsampling
35
37
# ###########################
36
- writeLines(" - using tidymodels" )
37
-
38
- temp <- test %> %
39
- select(- batch ) | >
40
- mutate(across(c(study_bis , sex , country_simply , opioid , canc_diag_simply , chemotherapy ), as.factor ),
41
- category = cut(av_pain_intensity , breaks = c(- Inf , 2 , Inf ), labels = c(" low" ," high" )))
38
+ print(" Using tidymodels" )
42
39
43
40
# # training / test split
44
41
writeLines(" - split data" )
@@ -59,7 +56,7 @@ up_train <- juice(upsample_prep)
59
56
# #############################
60
57
# # CV - FINE TUNING OF LAMBDA
61
58
# #############################
62
- mod_rec <- recipe(av_pain_intensity ~ . , data = up_train ) %> %
59
+ mod_rec <- recipe(score_intensity ~ . , data = up_train ) %> %
63
60
update_role(category , new_role = " dataset split variable" ) | >
64
61
step_zv(all_numeric(), - all_outcomes()) %> %
65
62
step_normalize(all_numeric(), - all_outcomes()) | >
@@ -143,7 +140,7 @@ lr_res %>%
143
140
collect_metrics()
144
141
145
142
lr_res %> % collect_predictions() | >
146
- summarise(r_xv = cor(.pred ,av_pain_intensity ))
143
+ summarise(r_xv = cor(.pred , score_intensity ))
147
144
148
145
preds1 = lr_res %> % collect_predictions()
149
146
@@ -166,7 +163,7 @@ last_lasso_mod <-
166
163
167
164
last_lasso_wf <- workflow() | >
168
165
add_model(last_lasso_mod ) | >
169
- add_formula(av_pain_intensity ~ . )
166
+ add_formula(score_intensity ~ . )
170
167
171
168
final_lasso_fit <- fit(last_lasso_wf , data = mtbsl1_training ) # # fit final model on the training set
172
169
print(final_lasso_fit )
@@ -175,12 +172,12 @@ final_lasso_fit$pre
175
172
# # 3 make predictions
176
173
preds = final_lasso_fit %> %
177
174
predict(new_data = mtbsl1_testing , type = " numeric" ) %> %
178
- bind_cols(mtbsl1_testing $ av_pain_intensity ) | >
175
+ bind_cols(mtbsl1_testing $ score_intensity ) | >
179
176
rename(av_pain_intensity = `...2` )
180
177
181
- cor(preds $ .pred , preds $ av_pain_intensity )
182
- sqrt(sum((preds $ av_pain_intensity - preds $ .pred )^ 2 )/ nrow(preds ))
183
-
184
- ggplot(data = preds , aes(.pred , av_pain_intensity )) + geom_point()
178
+ cor(preds $ .pred , preds $ score_intensity )
179
+ sqrt(sum((preds $ score_intensity - preds $ .pred )^ 2 )/ nrow(preds ))
185
180
181
+ ggplot(data = preds , aes(.pred , score_intensity )) + geom_point()
186
182
183
+ print(" DONE!" )
0 commit comments