Skip to content

Commit 21af1c8

Browse files
committed
tidying up script and comments
1 parent af27919 commit 21af1c8

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

r_scripts/penalised_regression/lasso_upsampling.R

+13-16
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ library("tidymodels")
1010
##############
1111
## PARAMETERS
1212
##############
13-
fname = "PI/data.RData" ## input data
13+
fname = "example/data.RData" ## input data
1414
ncpus = 4
1515
upsample_ratio = 1.25 ## default = 1
1616
###################
@@ -22,9 +22,11 @@ load(fname)
2222

2323
## data cleaning
2424
writeLines(" - data cleaning")
25+
## using support variable "category" for upsampling
2526
temp <- test %>%
2627
select(-batch) |>
27-
mutate(across(c(study_bis, sex, country_simply, opioid, canc_diag_simply, chemotherapy), as.factor))
28+
mutate(across(c(study, sex, country, treatment, diagnosis, therapy), as.factor),
29+
category=cut(score_intensity, breaks=c(-Inf, 2, Inf), labels=c("low","high")))
2830

2931
# Setup parallel backend to use n. processors
3032
cl <- makeCluster(ncpus)
@@ -33,12 +35,7 @@ registerDoParallel(cl)
3335
############################
3436
## tidymodels con upsampling
3537
############################
36-
writeLines(" - using tidymodels")
37-
38-
temp <- test %>%
39-
select(-batch) |>
40-
mutate(across(c(study_bis, sex, country_simply, opioid, canc_diag_simply, chemotherapy), as.factor),
41-
category=cut(av_pain_intensity, breaks=c(-Inf, 2, Inf), labels=c("low","high")))
38+
print("Using tidymodels")
4239

4340
## training / test split
4441
writeLines(" - split data")
@@ -59,7 +56,7 @@ up_train <- juice(upsample_prep)
5956
##############################
6057
## CV - FINE TUNING OF LAMBDA
6158
##############################
62-
mod_rec <- recipe(av_pain_intensity ~ ., data = up_train) %>%
59+
mod_rec <- recipe(score_intensity ~ ., data = up_train) %>%
6360
update_role(category, new_role = "dataset split variable") |>
6461
step_zv(all_numeric(), -all_outcomes()) %>%
6562
step_normalize(all_numeric(), -all_outcomes()) |>
@@ -143,7 +140,7 @@ lr_res %>%
143140
collect_metrics()
144141

145142
lr_res %>% collect_predictions() |>
146-
summarise(r_xv = cor(.pred,av_pain_intensity))
143+
summarise(r_xv = cor(.pred, score_intensity))
147144

148145
preds1 = lr_res %>% collect_predictions()
149146

@@ -166,7 +163,7 @@ last_lasso_mod <-
166163

167164
last_lasso_wf <- workflow() |>
168165
add_model(last_lasso_mod) |>
169-
add_formula(av_pain_intensity ~ .)
166+
add_formula(score_intensity ~ .)
170167

171168
final_lasso_fit <- fit(last_lasso_wf, data = mtbsl1_training) ## fit final model on the training set
172169
print(final_lasso_fit)
@@ -175,12 +172,12 @@ final_lasso_fit$pre
175172
## 3 make predictions
176173
preds = final_lasso_fit %>%
177174
predict(new_data = mtbsl1_testing, type = "numeric") %>%
178-
bind_cols(mtbsl1_testing$av_pain_intensity) |>
175+
bind_cols(mtbsl1_testing$score_intensity) |>
179176
rename(av_pain_intensity = `...2`)
180177

181-
cor(preds$.pred, preds$av_pain_intensity)
182-
sqrt(sum((preds$av_pain_intensity-preds$.pred)^2)/nrow(preds))
183-
184-
ggplot(data = preds, aes(.pred, av_pain_intensity)) + geom_point()
178+
cor(preds$.pred, preds$score_intensity)
179+
sqrt(sum((preds$score_intensity-preds$.pred)^2)/nrow(preds))
185180

181+
ggplot(data = preds, aes(.pred, score_intensity)) + geom_point()
186182

183+
print("DONE!")

0 commit comments

Comments
 (0)