Skip to content

Commit dbaf3a1

Browse files
committed
Update: Full rework of project
1) split into more managable scripts 2) use of config file 3) add Renv lock for reproducibility 4) adjusted the data simulation process to output pollen gran data 5) adjusted the RoC calculation to use R-Ratepol v0.4.6 6) fix bugs in detection of successes 7) improve model fitting
1 parent 033a604 commit dbaf3a1

File tree

61 files changed

+4737
-364
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+4737
-364
lines changed

R/00_config.R

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#----------------------------------------------------------#
2+
#
3+
#
4+
# Rate-of-change in palaeoecology
5+
#
6+
# Project config
7+
#
8+
# Ondrej Mottl
9+
# 2020
10+
#
11+
#----------------------------------------------------------#
12+
13+
#----------------------------------------------------------#
14+
# 1. Load libraries and functions -----
15+
#----------------------------------------------------------#
16+
17+
# delete existing workspace to start clean
18+
rm(list = ls())
19+
20+
# Package version control
21+
library(renv)
22+
# renv::init()
23+
# renv::snapshot(lockfile = "data/lock/revn.lock")
24+
renv::restore(lockfile = "data/lock/revn.lock")
25+
26+
# libraries
27+
library(tidyverse)
28+
library(devtools)
29+
library(glmmTMB)
30+
library(parallel)
31+
library(MuMIn)
32+
library(emmeans)
33+
library(performance)
34+
library(RColorBrewer)
35+
36+
37+
# instal RRatepol package download and attach
38+
# devtools::install_github("HOPE-UIB-BIO/R-Ratepol-package")
39+
40+
library(RRatepol)
41+
42+
#----------------------------------------------------------#
43+
# 2. Load example data and custom function -----
44+
#----------------------------------------------------------#
45+
46+
data_example <- RRatepol::example_data
47+
48+
files_sources <- list.files("R/functions/")
49+
sapply(paste0("R/functions/", files_sources, sep =""), source)
50+
51+
#----------------------------------------------------------#
52+
# 3. Definition of variables -----
53+
#----------------------------------------------------------#
54+
55+
# Number of simulated enviromental variables
56+
N_env <- 4
57+
58+
# diversity of pollen taxat in simulated data
59+
low_diversity <- 5
60+
high_diversity <- 50
61+
62+
# position of the enviromental change in the sequence
63+
breaks_recent <- c(2000, 3000)
64+
breaks_late <- c(5500, 6500)
65+
66+
# Number of simulated datasest of pollen data
67+
N_rep <- 100
68+
69+
# template of time sequence with uneven distribution of points
70+
time_seq <- data_example$list_ages[[4]]$ages$age
71+
72+
# number of cores
73+
n_cores <- parallel::detectCores()
74+
75+
# value for beta family values
76+
very_small_value <- .Machine$double.eps*100
77+
78+
#----------------------------------------------------------#
79+
# 4. Graphical setings -----
80+
#----------------------------------------------------------#
81+
82+
theme_set(theme_classic())
83+
84+
text_size <- 12
85+
86+
color_legen_segment <- brewer.pal(n = 3, name = 'Set2')
87+
names(color_legen_segment) <- c("correct detection", "false positives")
88+
89+
90+
color_legen_dataset_type <- brewer.pal(n = 4, name = 'Set1')
91+
names(color_legen_dataset_type) <-
92+
c("high density level_high richness",
93+
"high density level_low richness",
94+
"low_density level_high richness",
95+
"low_density level_low richness"
96+
)
97+
98+
color_legen_smooth <- brewer.pal(n = 5, name = 'Set3')
99+
names(color_legen_smooth) <- c("None","M.avg","Grimm","Age.w","Shep")
100+
101+
102+
103+

R/01_data_creation.R

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#----------------------------------------------------------#
2+
#
3+
#
4+
# Rate-of-change in palaeoecology
5+
#
6+
# Data preparation
7+
#
8+
# Ondrej Mottl
9+
# 2020
10+
#
11+
#----------------------------------------------------------#
12+
13+
# load config
14+
source("R/00_config.R")
15+
16+
#----------------------------------------------------------#
17+
# 1. Simulate datasets -----
18+
#----------------------------------------------------------#
19+
20+
# low diversity recent
21+
22+
sim_ld_recent <-
23+
fc_simulate_pollen_data_in_multiple_datasets(
24+
time = time_seq,
25+
nforc = N_env,
26+
nprox = low_diversity,
27+
manual_edit = TRUE,
28+
breaks = breaks_recent,
29+
jitter = TRUE,
30+
rarity = TRUE,
31+
N_datasets = N_rep)
32+
33+
34+
sim_ld_late <-
35+
fc_simulate_pollen_data_in_multiple_datasets(
36+
time=time_seq,
37+
nforc=N_env,
38+
nprox=high_diversity,
39+
manual_edit = T,
40+
breaks=breaks_late,
41+
jitter = T,
42+
rarity=T,
43+
N_datasets=N_rep)
44+
45+
46+
sim_hd_recent <-
47+
fc_simulate_pollen_data_in_multiple_datasets(
48+
time=time_seq,
49+
nforc=N_env,
50+
nprox=high_diversity,
51+
manual_edit = T,
52+
breaks=breaks_recent,
53+
jitter = T,
54+
rarity=T,
55+
N_datasets=N_rep)
56+
57+
58+
sim_hd_late <-
59+
fc_simulate_pollen_data_in_multiple_datasets(
60+
time=time_seq,
61+
nforc=N_env,
62+
nprox=high_diversity,
63+
manual_edit = T,
64+
breaks=breaks_late,
65+
jitter = T,
66+
rarity=T,
67+
N_datasets=N_rep)
68+
69+
70+
#----------------------------------------------------------#
71+
# 2. Merge datasets -----
72+
#----------------------------------------------------------#
73+
74+
simulated_dataset <-
75+
dpyr::bind_rows(
76+
77+
tibble::tibble(
78+
sim_ld_recent,
79+
diversity = "low_diversity",
80+
position = "breaks_recent"),
81+
82+
tibble::tibble(
83+
sim_ld_late,
84+
diversity = "low_diversity",
85+
position = "breaks_late"),
86+
87+
tibble::tibble(
88+
sim_ld_recent,
89+
diversity = "high_diversity",
90+
position = "breaks_recent"),
91+
92+
tibble::tibble(
93+
sim_ld_late,
94+
diversity = "high_diversity",
95+
position = "breaks_late")) %>%
96+
mutate(
97+
dataset_ID = paste(ID,diversity, position) %>%
98+
as.factor() %>%
99+
as.numeric()) %>%
100+
arrange(dataset_ID) %>%
101+
dplyr::select(dataset_ID, diversity, position, community_data, list_ages) %>%
102+
mutate(dataset_ID = as.character(dataset_ID))
103+
104+
simulated_dataset
105+
106+
#----------------------------------------------------------#
107+
# 3. Save datasets -----
108+
#----------------------------------------------------------#
109+
write_rds(simulated_dataset,"data/output/datasets/simulated_dataset.rds")

R/02_ROC_of_simulated_data.R

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#----------------------------------------------------------#
2+
#
3+
#
4+
# Rate-of-change in palaeoecology
5+
#
6+
# RoC in simulated datasets
7+
# & peak detection success
8+
#
9+
# Ondrej Mottl
10+
# 2020
11+
#
12+
#----------------------------------------------------------#
13+
14+
# load config
15+
source("R/00_config.R")
16+
17+
#----------------------------------------------------------#
18+
# 1. Load data -----
19+
#----------------------------------------------------------#
20+
21+
list_files_output <- list.files("data/output/datasets/")
22+
23+
if(any(list_files_output %in% "simulated_dataset.rds")){
24+
simulated_dataset <- read_rds("data/output/datasets/simulated_dataset.rds")
25+
} else {
26+
source("R/01_data_creation.R")
27+
}
28+
29+
#----------------------------------------------------------#
30+
# 2. Calculate RoC -----
31+
#----------------------------------------------------------#
32+
33+
sim_ROC_levels <-
34+
fc_estimate_RoC_by_all_methods(
35+
simulated_dataset,
36+
Working_Unit = "levels",
37+
interest_threshold = 8000)
38+
39+
write_rds(
40+
sim_ROC_levels,
41+
"data/output/datasets/sim_ROC_levels_compress.rds",
42+
compress = "gz")
43+
44+
sim_ROC_bins <-
45+
fc_estimate_RoC_by_all_methods(
46+
simulated_dataset,
47+
Working_Unit = "bins",
48+
bin_size = 500,
49+
Number_of_shifts = 1,
50+
interest_threshold = 8000)
51+
52+
write_rds(
53+
sim_ROC_bins,
54+
"data/output/datasets/sim_ROC_bins_compress.rds",
55+
compress = "gz")
56+
57+
58+
sim_ROC_MW <-
59+
fc_estimate_RoC_by_all_methods(
60+
simulated_dataset,
61+
Working_Unit = "MW",
62+
bin_size = 500,
63+
Number_of_shifts = 5,
64+
interest_threshold = 8000)
65+
66+
write_rds(
67+
sim_ROC_MW,
68+
"data/output/datasets/sim_ROC_MW_compress.rds",
69+
compress = "gz")
70+
71+
72+
#----------------------------------------------------------#
73+
# 3. Merge files -----
74+
#----------------------------------------------------------#
75+
sim_ROC_all <-
76+
bind_rows(
77+
tibble(sim_ROC_levels, WU = "levels"),
78+
tibble(sim_ROC_bins, WU = "bins"),
79+
tibble(sim_ROC_MW, WU = "MW")
80+
) %>%
81+
mutate(
82+
calculation_ID = paste0(WU, calculation_number) %>%
83+
as.factor() %>%
84+
as.numeric()) %>%
85+
dplyr::select(dataset_ID, calculation_ID, everything()) %>%
86+
arrange(dataset_ID, calculation_ID)
87+
88+
sim_ROC_all$calculation_ID %>%
89+
unique() %>%
90+
length()
91+
92+
sim_ROC_all$dataset_ID %>%
93+
unique() %>%
94+
length()
95+
96+
write_rds(
97+
sim_ROC_all,
98+
"data/output/datasets/simulated_roc.rds",
99+
compress = "gz")
100+
101+
#----------------------------------------------------------#
102+
# 4. Detect sucesss of peak points -----
103+
#----------------------------------------------------------#
104+
105+
perform_sim <- fc_test_success_in_simulated_data(sim_ROC_all)
106+
107+
write_rds(perform_sim, "data/output/datasets/simulated_success.rds")
108+

0 commit comments

Comments
 (0)