-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathanalyse_baSAR.R
2492 lines (2066 loc) · 87.3 KB
/
analyse_baSAR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#' Bayesian models (baSAR) applied on luminescence data
#'
#' This function allows the application of Bayesian models on luminescence data, measured
#' with the single-aliquot regenerative-dose (SAR, Murray and Wintle, 2000) protocol. In particular,
#' it follows the idea proposed by Combès et al., 2015 of using an hierarchical model for estimating
#' a central equivalent dose from a set of luminescence measurements. This function is (I) the adoption
#' of this approach for the R environment and (II) an extension and a technical refinement of the
#' published code.
#'
#' Internally the function consists of two parts: (I) The Bayesian core for the Bayesian calculations
#' and applying the hierarchical model and (II) a data pre-processing part. The Bayesian core can be run
#' independently, if the input data are sufficient (see below). The data pre-processing part was
#' implemented to simplify the analysis for the user as all needed data pre-processing is done
#' by the function, i.e. in theory it is enough to provide a BIN/BINX-file with the SAR measurement
#' data. For the Bayesian analysis for each aliquot the following information are needed from the SAR analysis.
#' `LxTx`, the `LxTx` error and the dose values for all regeneration points.
#'
#' **How is the systematic error contribution calculated?**
#'
#' Standard errors (so far) provided with the source dose rate are considered as systematic uncertainties
#' and added to final central dose by:
#'
#' \deqn{systematic.error = 1/n \sum SE(source.doserate)}
#'
#' \deqn{SE(central.dose.final) = \sqrt{SE(central.dose)^2 + systematic.error^2}}
#'
#' Please note that this approach is rather rough and can only be valid if the source dose rate
#' errors, in case different readers had been used, are similar. In cases where more than
#' one source dose rate is provided a warning is given.
#'
#' **Input / output scenarios**
#'
#' Various inputs are allowed for this function. Unfortunately this makes the function handling rather
#' complex, but at the same time very powerful. Available scenarios:
#'
#' **(1) - `object` is BIN-file or link to a BIN-file**
#'
#' Finally it does not matter how the information of the BIN/BINX file are provided. The function
#' supports **(a)** either a path to a file or directory or a `list` of file names or paths or
#' **(b)** a [Risoe.BINfileData-class] object or a list of these objects. The latter one can
#' be produced by using the function [read_BIN2R], but this function is called automatically
#' if only a file name and/or a path is provided. In both cases it will become the data that can be
#' used for the analysis.
#'
#' `[CSV_file = NULL]`
#'
#' If no CSV file (or data frame with the same format) is provided, the
#' function runs an automatic process that consists of the following steps:
#'
#' 1. Select all valid aliquots using the function [verify_SingleGrainData]
#' 2. Calculate `Lx/Tx` values using the function [calc_OSLLxTxRatio]
#' 3. Calculate De values using the function [plot_GrowthCurve]
#'
#' These proceeded data are subsequently used in for the Bayesian analysis
#'
#' `[CSV_file != NULL]`
#'
#' If a CSV file is provided (or a `data.frame` containing similar information)
#' the pre-processing phase consists of the following steps:
#'
#' 1. Calculate `Lx/Tx` values using the function [calc_OSLLxTxRatio]
#' 2. Calculate De values using the function [plot_GrowthCurve]
#'
#' The CSV file should contain a selection of the BIN-file names and the aliquots selected
#' for the further analysis. This allows a manual selection of input data, as the automatic selection
#' by [verify_SingleGrainData] might be not totally sufficient.
#'
#'
#' **(2) - `object` `RLum.Results object`**
#'
#' If an [RLum.Results-class] object is provided as input and(!) this object was
#' previously created by the function `analyse_baSAR()` itself, the pre-processing part
#' is skipped and the function starts directly with the Bayesian analysis. This option is very powerful
#' as it allows to change parameters for the Bayesian analysis without the need to repeat
#' the data pre-processing. If furthermore the argument `aliquot_range` is set, aliquots
#' can be manually excluded based on previous runs.
#'
#' **`method_control`**
#'
#' These are arguments that can be passed directly to the Bayesian calculation core, supported arguments
#' are:
#'
#' \tabular{lll}{
#' **Parameter** \tab **Type** \tab **Description**\cr
#' `lower_centralD` \tab [numeric] \tab sets the lower bound for the expected De range. Change it only if you know what you are doing!\cr
#' `upper_centralD` \tab [numeric] \tab sets the upper bound for the expected De range. Change it only if you know what you are doing!\cr
#' `n.chains` \tab [integer] \tab sets number of parallel chains for the model (default = 3) (cf. [rjags::jags.model])\cr
#' `inits` \tab [list] \tab option to set initialisation values (cf. [rjags::jags.model]) \cr
#' `thin` \tab [numeric] \tab thinning interval for monitoring the Bayesian process (cf. [rjags::jags.model])\cr
#' `variable.names` \tab [character] \tab set the variables to be monitored during the MCMC run, default:
#' `'central_D'`, `'sigma_D'`, `'D'`, `'Q'`, `'a'`, `'b'`, `'c'`, `'g'`.
#' Note: only variables present in the model can be monitored.
#' }
#'
#' **User defined models**\cr
#'
#' The function provides the option to modify and to define own models that can be used for
#' the Bayesian calculation. In the case the user wants to modify a model, a new model
#' can be piped into the function via the argument `baSAR_model` as `character`.
#' The model has to be provided in the JAGS dialect of the BUGS language (cf. [rjags::jags.model])
#' and parameter names given with the pre-defined names have to be respected, otherwise the function
#' will break.
#'
#' **FAQ**
#'
#' Q: How can I set the seed for the random number generator (RNG)?
#'
#' A: Use the argument `method_control`, e.g., for three MCMC chains
#' (as it is the default):
#'
#' ```
#' method_control = list(
#' inits = list(
#' list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 1),
#' list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 2),
#' list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 3)
#' ))
#' ```
#'
#' This sets a reproducible set for every chain separately.\cr
#'
#' Q: How can I modify the output plots?
#'
#' A: You can't, but you can use the function output to create own, modified plots.
#'
#'
#' Q: Can I change the boundaries for the central_D?
#'
#' A: Yes, we made it possible, but we DO NOT recommend it, except you know what you are doing!\cr
#' Example: `method_control = list(lower_centralD = 10))`
#'
#' Q: The lines in the baSAR-model appear to be in a wrong logical order?\cr
#'
#' A: This is correct and allowed (cf. JAGS manual)
#'
#'
#' **Additional arguments support via the `...` argument**
#'
#' This list summarizes the additional arguments that can be passed to the internally used
#' functions.
#'
#' \tabular{llll}{
#' **Supported argument** \tab **Corresponding function** \tab **Default** \tab **Short description **\cr
#' `threshold` \tab [verify_SingleGrainData] \tab `30` \tab change rejection threshold for curve selection \cr
#' `skip` \tab [data.table::fread] \tab `0` \tab number of rows to be skipped during import\cr
#' `n.records` \tab [read_BIN2R] \tab `NULL` \tab limit records during BIN-file import\cr
#' `duplicated.rm` \tab [read_BIN2R] \tab `TRUE` \tab remove duplicated records in the BIN-file\cr
#' `pattern` \tab [read_BIN2R] \tab `TRUE` \tab select BIN-file by name pattern\cr
#' `position` \tab [read_BIN2R] \tab `NULL` \tab limit import to a specific position\cr
#' `background.count.distribution` \tab [calc_OSLLxTxRatio] \tab `"non-poisson"` \tab set assumed count distribution\cr
#' `fit.weights` \tab [plot_GrowthCurve] \tab `TRUE` \tab enables / disables fit weights\cr
#' `fit.bounds` \tab [plot_GrowthCurve] \tab `TRUE` \tab enables / disables fit bounds\cr
#' `NumberIterations.MC` \tab [plot_GrowthCurve] \tab `100` \tab number of MC runs for error calculation\cr
#' `output.plot` \tab [plot_GrowthCurve] \tab `TRUE` \tab enables / disables dose response curve plot\cr
#' `output.plotExtended` \tab [plot_GrowthCurve] \tab `TRUE` \tab enables / disables extended dose response curve plot\cr
#' }
#'
#'
#' @param object [Risoe.BINfileData-class], [RLum.Results-class], [list] of [RLum.Analysis-class],
#' [character] or [list] (**required**):
#' input object used for the Bayesian analysis. If a `character` is provided the function
#' assumes a file connection and tries to import a BIN/BINX-file using the provided path. If a `list` is
#' provided the list can only contain either `Risoe.BINfileData` objects or `character`s
#' providing a file connection. Mixing of both types is not allowed. If an [RLum.Results-class]
#' is provided the function directly starts with the Bayesian Analysis (see details)
#'
#' @param CSV_file [character] (*optional*):
#' CSV_file with data for the analysis. This file must contain 3 columns:
#' the name of the file, the disc position and the grain position
#' (the last being 0 for multi-grain measurements).\cr
#' Alternatively a `data.frame` of similar structure can be provided.
#'
#' @param aliquot_range [numeric] (*optional*):
#' allows to limit the range of the aliquots used for the analysis.
#' This argument has only an effect if the argument `CSV_file` is used or
#' the input is the previous output (i.e. is [RLum.Results-class]). In this case the
#' new selection will add the aliquots to the removed aliquots table.
#'
#' @param source_doserate [numeric] **(required)**:
#' source dose rate of beta-source used for the measurement and its uncertainty
#' in Gy/s, e.g., `source_doserate = c(0.12, 0.04)`. Parameter can be provided
#' as `list`, for the case that more than one BIN-file is provided, e.g.,
#' `source_doserate = list(c(0.04, 0.004), c(0.05, 0.004))`.
#'
#' @param signal.integral [vector] (**required**):
#' vector with the limits for the signal integral used for the calculation,
#' e.g., `signal.integral = c(1:5)`. Ignored if `object` is an [RLum.Results-class] object.
#' The parameter can be provided as `list`, see `source_doserate`.
#'
#' @param signal.integral.Tx [vector] (*optional*):
#' vector with the limits for the signal integral for the Tx curve. I
#' f nothing is provided the value from `signal.integral` is used and it is ignored
#' if `object` is an [RLum.Results-class] object.
#' The parameter can be provided as `list`, see `source_doserate`.
#'
#' @param background.integral [vector] (**required**):
#' vector with the bounds for the background integral.
#' Ignored if `object` is an [RLum.Results-class] object.
#' The parameter can be provided as `list`, see `source_doserate`.
#'
#' @param background.integral.Tx [vector] (*optional*):
#' vector with the limits for the background integral for the Tx curve.
#' If nothing is provided the value from `background.integral` is used.
#' Ignored if `object` is an [RLum.Results-class] object.
#' The parameter can be provided as `list`, see `source_doserate`.
#'
#' @param irradiation_times [numeric] (*optional*): if set this vector replaces all irradiation
#' times for one aliquot and one cycle (Lx and Tx curves) and recycles it for all others cycles and aliquots.
#' Please note that if this argument is used, for every(!) single curve
#' in the dataset an irradiation time needs to be set.
#'
#' @param sigmab [numeric] (*with default*):
#' option to set a manual value for the overdispersion (for `LnTx` and `TnTx`),
#' used for the `Lx`/`Tx` error calculation. The value should be provided as
#' absolute squared count values, cf. [calc_OSLLxTxRatio].
#' The parameter can be provided as `list`, see `source_doserate`.
#'
#' @param sig0 [numeric] (*with default*):
#' allow adding an extra component of error to the final Lx/Tx error value
#' (e.g., instrumental error, see details is [calc_OSLLxTxRatio]).
#' The parameter can be provided as `list`, see `source_doserate`.
#'
#' @param distribution [character] (*with default*):
#' type of distribution that is used during Bayesian calculations for
#' determining the Central dose and overdispersion values.
#' Allowed inputs are `"cauchy"`, `"normal"` and `"log_normal"`.
#'
#' @param baSAR_model [character] (*optional*):
#' option to provide an own modified or new model for the Bayesian calculation
#' (see details). If an own model is provided the argument `distribution` is
#' ignored and set to `'user_defined'`
#'
#' @param n.MCMC [integer] (*with default*):
#' number of iterations for the Markov chain Monte Carlo (MCMC) simulations
#'
#' @param fit.method [character] (*with default*):
#' equation used for the fitting of the dose-response curve using the function
#' [plot_GrowthCurve] and then for the Bayesian modelling. Here supported methods: `EXP`, `EXP+LIN` and `LIN`
#'
#' @param fit.force_through_origin [logical] (*with default*):
#' force fitting through origin
#'
#' @param fit.includingRepeatedRegPoints [logical] (*with default*):
#' includes the recycling point (assumed to be measured during the last cycle)
#'
#' @param method_control [list] (*optional*):
#' named list of control parameters that can be directly
#' passed to the Bayesian analysis, e.g., `method_control = list(n.chains = 4)`.
#' See details for further information
#'
#' @param digits [integer] (*with default*):
#' round output to the number of given digits
#'
#' @param distribution_plot [character] (*with default*): sets the final distribution plot that
#' shows equivalent doses obtained using the frequentist approach and sets in the central dose
#' as comparison obtained using baSAR. Allowed input is `'abanico'` or `'kde'`. If set to `NULL` nothing is plotted.
#'
#' @param plot [logical] (*with default*):
#' enables or disables plot output
#'
#' @param plot_reduced [logical] (*with default*):
#' enables or disables the advanced plot output
#'
#' @param plot.single [logical] (*with default*):
#' enables or disables single plots or plots arranged by `analyse_baSAR`
#'
#' @param verbose [logical] (*with default*):
#' enables or disables verbose mode
#'
#' @param ... parameters that can be passed to the function [calc_OSLLxTxRatio]
#' (almost full support), [data.table::fread] (`skip`), [read_BIN2R] (`n.records`,
#' `position`, `duplicated.rm`), see details.
#'
#'
#' @return Function returns results numerically and graphically:
#'
#' -----------------------------------\cr
#' `[ NUMERICAL OUTPUT ]`\cr
#' -----------------------------------\cr
#'
#' **`RLum.Results`**-object
#'
#' **slot:** **`@data`**
#'
#' \tabular{lll}{
#' **Element** \tab **Type** \tab **Description**\cr
#' `$summary` \tab `data.frame` \tab statistical summary, including the central dose \cr
#' `$mcmc` \tab `mcmc` \tab [coda::mcmc.list] object including raw output \cr
#' `$models` \tab `character` \tab implemented models used in the baSAR-model core \cr
#' `$input_object` \tab `data.frame` \tab summarising table (same format as the XLS-file) including, e.g., Lx/Tx values\cr
#' `$removed_aliquots` \tab `data.frame` \tab table with removed aliquots (e.g., `NaN`, or `Inf` `Lx`/`Tx` values). If nothing was removed `NULL` is returned
#' }
#'
#'**slot:** **`@info`**
#'
#' The original function call
#'
#' ------------------------\cr
#' `[ PLOT OUTPUT ]`\cr
#' ------------------------\cr
#'
#' - (A) Ln/Tn curves with set integration limits,
#' - (B) trace plots are returned by the baSAR-model, showing the convergence of the parameters (trace)
#' and the resulting kernel density plots. If `plot_reduced = FALSE` for every(!) dose a trace and
#' a density plot is returned (this may take a long time),
#' - (C) dose plots showing the dose for every aliquot as boxplots and the marked
#' HPD in within. If boxes are coloured 'orange' or 'red' the aliquot itself should be checked,
#' - (D) the dose response curve resulting from the monitoring of the Bayesian modelling are
#' provided along with the Lx/Tx values and the HPD. Note: The amount for curves displayed
#' is limited to 1000 (random choice) for performance reasons,
#' - (E) the final plot is the De distribution as calculated using the conventional (frequentist) approach
#' and the central dose with the HPDs marked within. This figure is only provided for a comparison,
#' no further statistical conclusion should be drawn from it.
#'
#'
#' **Please note: If distribution was set to `log_normal` the central dose is given as geometric mean!**
#'
#'
#' @section Function version: 0.1.33
#'
#' @author
#' Norbert Mercier, IRAMAT-CRP2A, Université Bordeaux Montaigne (France) \cr
#' Sebastian Kreutzer, Institute of Geography, Heidelberg University (Germany) \cr
#' The underlying Bayesian model based on a contribution by Combès et al., 2015.
#'
#' @seealso [read_BIN2R], [calc_OSLLxTxRatio], [plot_GrowthCurve],
#' [data.table::fread], [verify_SingleGrainData],
#' [rjags::jags.model], [rjags::coda.samples], [boxplot.default]
#'
#'
#' @references
#'
#' Combès, B., Philippe, A., Lanos, P., Mercier, N., Tribolo, C., Guerin, G., Guibert, P., Lahaye, C., 2015.
#' A Bayesian central equivalent dose model for optically stimulated luminescence dating.
#' Quaternary Geochronology 28, 62-70. doi:10.1016/j.quageo.2015.04.001
#'
#' Mercier, N., Kreutzer, S., Christophe, C., Guerin, G., Guibert, P., Lahaye, C., Lanos, P., Philippe, A.,
#' Tribolo, C., 2016. Bayesian statistics in luminescence dating: The 'baSAR'-model and its implementation
#' in the R package 'Luminescence'. Ancient TL 34, 14-21.
#'
#' **Further reading**
#'
#' Gelman, A., Carlin, J.B., Stern, H.S., Dunson, D.B., Vehtari, A., Rubin, D.B., 2013.
#' Bayesian Data Analysis, Third Edition. CRC Press.
#'
#' Murray, A.S., Wintle, A.G., 2000. Luminescence dating of quartz using an improved single-aliquot
#' regenerative-dose protocol. Radiation Measurements 32, 57-73. doi:10.1016/S1350-4487(99)00253-X
#'
#' Plummer, M., 2017. JAGS Version 4.3.0 user manual. `https://sourceforge.net/projects/mcmc-jags/files/Manuals/4.x/jags_user_manual.pdf/download`
#'
#' @note
#' **If you provide more than one BIN-file**, it is **strongly** recommended to provide
#' a `list` with the same number of elements for the following parameters:
#'
#' `source_doserate`, `signal.integral`, `signal.integral.Tx`, `background.integral`,
#' `background.integral.Tx`, `sigmab`, `sig0`.
#'
#' Example for two BIN-files: `source_doserate = list(c(0.04, 0.006), c(0.05, 0.006))`
#'
#' **The function is currently limited to work with standard Risoe BIN-files only!**
#'
#' @keywords datagen
#'
#' @examples
#'
#' ##(1) load package test data set
#' data(ExampleData.BINfileData, envir = environment())
#'
#' ##(2) selecting relevant curves, and limit dataset
#' CWOSL.SAR.Data <- subset(
#' CWOSL.SAR.Data,
#' subset = POSITION%in%c(1:3) & LTYPE == "OSL")
#'
#' \dontrun{
#' ##(3) run analysis
#' ##please not that the here selected parameters are
#' ##choosen for performance, not for reliability
#' results <- analyse_baSAR(
#' object = CWOSL.SAR.Data,
#' source_doserate = c(0.04, 0.001),
#' signal.integral = c(1:2),
#' background.integral = c(80:100),
#' fit.method = "LIN",
#' plot = FALSE,
#' n.MCMC = 200
#'
#' )
#'
#' print(results)
#'
#'
#' ##CSV_file template
#' ##copy and paste this the code below in the terminal
#' ##you can further use the function write.csv() to export the example
#'
#' CSV_file <-
#' structure(
#' list(
#' BIN_FILE = NA_character_,
#' DISC = NA_real_,
#' GRAIN = NA_real_),
#' .Names = c("BIN_FILE", "DISC", "GRAIN"),
#' class = "data.frame",
#' row.names = 1L
#' )
#'
#' }
#'
#' @md
#' @export
analyse_baSAR <- function(
object,
CSV_file = NULL,
aliquot_range = NULL,
source_doserate = NULL,
signal.integral,
signal.integral.Tx = NULL,
background.integral,
background.integral.Tx = NULL,
irradiation_times = NULL,
sigmab = 0,
sig0 = 0.025,
distribution = "cauchy",
baSAR_model = NULL,
n.MCMC = 100000,
fit.method = "EXP",
fit.force_through_origin = TRUE,
fit.includingRepeatedRegPoints = TRUE,
method_control = list(),
digits = 3L,
distribution_plot = "kde",
plot = TRUE,
plot_reduced = TRUE,
plot.single = FALSE,
verbose = TRUE,
...
) {
.set_function_name("analyse_baSAR")
on.exit(.unset_function_name(), add = TRUE)
##////////////////////////////////////////////////////////////////////////////////////////////////
##FUNCTION TO BE CALLED to RUN the Bayesian Model
##////////////////////////////////////////////////////////////////////////////////////////////////
##START
.baSAR_function <-
function(Nb_aliquots,
distribution,
data.Dose,
data.Lum,
data.sLum,
fit.method,
n.MCMC,
fit.force_through_origin,
fit.includingRepeatedRegPoints,
method_control,
baSAR_model,
verbose)
{
##lower and uppder De, grep from method_control ... for sure we find it here,
##as it was set before the function call
lower_centralD <- method_control[["lower_centralD"]]
upper_centralD <- method_control[["upper_centralD"]]
##number of MCMC
n.chains <- if (is.null(method_control[["n.chains"]])) {
3
} else{
method_control[["n.chains"]]
}
##inits
inits <- if (is.null(method_control[["inits"]])) {
NULL
} else{
method_control[["inits"]]
}
##thin
thin <- if (is.null(method_control[["thin"]])) {
if(n.MCMC >= 1e+05){
thin <- n.MCMC/1e+05 * 250
}else{
thin <- 10
}
} else{
method_control[["thin"]]
}
##variable.names
variable.names <- if (is.null(method_control[["variable.names"]])) {
c('central_D', 'sigma_D', 'D', 'Q', 'a', 'b', 'c', 'g')
} else{
method_control[["variable.names"]]
}
#check whether this makes sense at all, just a direty and quick test
stopifnot(lower_centralD >= 0)
Limited_cycles <- vector()
if (fit.method == "EXP") {ExpoGC <- 1 ; LinGC <- 0 }
if (fit.method == "LIN") {ExpoGC <- 0 ; LinGC <- 1 }
if (fit.method == "EXP+LIN") {ExpoGC <- 1 ; LinGC <- 1 }
if (fit.force_through_origin == TRUE) {GC_Origin <- 1} else {GC_Origin <- 0}
##Include or exclude repeated dose points
if (fit.includingRepeatedRegPoints) {
for (i in 1:Nb_aliquots) {
Limited_cycles[i] <- length(stats::na.exclude(data.Dose[,i]))
}
}else{
for (i in 1:Nb_aliquots) {
temp.logic <- !duplicated(data.Dose[,i], incomparables=c(0)) # logical excluding 0
m <- length(which(!temp.logic))
data.Dose[,i] <- c(data.Dose[,i][temp.logic], rep(NA, m))
data.Lum[,i] <- c(data.Lum[,i][temp.logic], rep(NA, m))
data.sLum[,i] <- c(data.sLum[,i][temp.logic], rep(NA, m))
rm(m, temp.logic)
}
for (i in 1:Nb_aliquots) {
Limited_cycles[i] <- length(data.Dose[, i]) - length(which(is.na(data.Dose[, i])))
}
}
##check and correct for distribution name
if (!is.null(baSAR_model) && distribution != "user_defined") {
distribution <- "user_defined"
message("[analyse_basAR()] 'baSAR_model' provided, setting ",
"distribution to 'user_defined'")
}
# Bayesian Models ----------------------------------------------------------------------------
# INFO: >
# > sometimes lines apear to be in a wrong logical order, however, this is allowed in the
# > model definition since:
# > "The data block is not limited to logical relations, but may also include stochastic relations."
# > (Plummer, 2017. JAGS Version 4.3.0 user manual, p. 9)
baSAR_models <- list(
cauchy = "model {
central_D ~ dunif(lower_centralD,upper_centralD)
precision_D ~ dt(0, pow(0.16*central_D, -2), 1)T(0, )
sigma_D <- 1/sqrt(precision_D)
for (i in 1:Nb_aliquots) {
a[i] ~ dnorm(6.5 , 1/(9.2^2) ) T(0, )
b[i] ~ dnorm(50 , 1/(1000^2) ) T(0, )
c[i] ~ dnorm(1.002 , 1/(0.9^2) ) T(0, )
g[i] ~ dnorm(0.5 , 1/(2.5^2) ) I(-a[i], )
sigma_f[i] ~ dexp (20)
D[i] ~ dt ( central_D , precision_D, 1) # Cauchy distribution
S_y[1,i] <- 1/(sLum[1,i]^2 + sigma_f[i]^2)
Lum[1,i] ~ dnorm ( Q[1,i] , S_y[1,i])
Q[1,i] <- GC_Origin * g[i] + LinGC * (c[i] * D[i] ) + ExpoGC * (a[i] * (1 - exp (-D[i] /b[i])) )
for (m in 2:Limited_cycles[i]) {
S_y[m,i] <- 1/(sLum[m,i]^2 + sigma_f[i]^2)
Lum[m,i] ~ dnorm( Q[m,i] , S_y[m,i] )
Q[m,i] <- GC_Origin * g[i] + LinGC * (c[i] * Dose[m,i]) + ExpoGC * (a[i] * (1 - exp (-Dose[m,i]/b[i])) )
}
}
}",
normal = "model {
central_D ~ dunif(lower_centralD,upper_centralD)
sigma_D ~ dunif(0.01, 1 * central_D)
for (i in 1:Nb_aliquots) {
a[i] ~ dnorm(6.5 , 1/(9.2^2) ) T(0, )
b[i] ~ dnorm(50 , 1/(1000^2) ) T(0, )
c[i] ~ dnorm(1.002 , 1/(0.9^2) ) T(0, )
g[i] ~ dnorm(0.5 , 1/(2.5^2) ) I(-a[i], )
sigma_f[i] ~ dexp (20)
D[i] ~ dnorm ( central_D , 1/(sigma_D^2) ) # Normal distribution
S_y[1,i] <- 1/(sLum[1,i]^2 + sigma_f[i]^2)
Lum[1,i] ~ dnorm ( Q[1,i] , S_y[1,i])
Q[1,i] <- GC_Origin * g[i] + LinGC * (c[i] * D[i] ) + ExpoGC * (a[i] * (1 - exp (-D[i] /b[i])) )
for (m in 2:Limited_cycles[i]) {
S_y[m,i] <- 1/(sLum[m,i]^2 + sigma_f[i]^2)
Lum[m,i] ~ dnorm( Q[m,i] , S_y[m,i] )
Q[m,i] <- GC_Origin * g[i] + LinGC * (c[i] * Dose[m,i]) + ExpoGC * (a[i] * (1 - exp (-Dose[m,i]/b[i])) )
}
}
}",
log_normal = "model {
central_D ~ dunif(lower_centralD,upper_centralD)
log_central_D <- log(central_D) - 0.5 * l_sigma_D^2
l_sigma_D ~ dunif(0.01, 1 * log(central_D))
sigma_D <- sqrt((exp(l_sigma_D^2) -1) * exp( 2*log_central_D + l_sigma_D^2) )
for (i in 1:Nb_aliquots) {
a[i] ~ dnorm(6.5 , 1/(9.2^2) ) T(0, )
b[i] ~ dnorm(50 , 1/(1000^2) ) T(0, )
c[i] ~ dnorm(1.002 , 1/(0.9^2) ) T(0, )
g[i] ~ dnorm(0.5 , 1/(2.5^2) ) I(-a[i], )
sigma_f[i] ~ dexp (20)
log_D[i] ~ dnorm ( log_central_D , 1/(l_sigma_D^2) ) # Log-Normal distribution
D[i] <- exp(log_D[i])
S_y[1,i] <- 1/(sLum[1,i]^2 + sigma_f[i]^2)
Lum[1,i] ~ dnorm ( Q[1,i] , S_y[1,i])
Q[1,i] <- GC_Origin * g[i] + LinGC * (c[i] * D[i] ) + ExpoGC * (a[i] * (1 - exp (-D[i] /b[i])) )
for (m in 2:Limited_cycles[i]) {
S_y[m,i] <- 1/(sLum[m,i]^2 + sigma_f[i]^2)
Lum[m,i] ~ dnorm( Q[m,i] , S_y[m,i] )
Q[m,i] <- GC_Origin * g[i] + LinGC * (c[i] * Dose[m,i]) + ExpoGC * (a[i] * (1 - exp (-Dose[m,i]/b[i])) )
}
}
}",
user_defined = baSAR_model
)
##check whether the input for distribution was sufficient
if (!distribution %in% names(baSAR_models)) {
.throw_error("No pre-defined model for the requested distribution. ",
"Please select one of '",
paste(rev(names(baSAR_models))[-1], collapse = "', '"),
"', or define an own model using argument 'baSAR_model'")
}
if (distribution == "user_defined" && is.null(baSAR_model)) {
.throw_error("You specified a 'user_defined' distribution, ",
"but did not provide a model via 'baSAR_model'")
}
### Bayesian inputs
data_Liste <- list(
'Dose' = data.Dose,
'Lum' = data.Lum,
'sLum' = data.sLum,
'LinGC' = LinGC,
'ExpoGC' = ExpoGC,
'GC_Origin' = GC_Origin,
'Limited_cycles' = Limited_cycles,
'lower_centralD' = lower_centralD,
'upper_centralD' = upper_centralD,
'Nb_aliquots' = Nb_aliquots
)
if(verbose){
cat("\n[analyse_baSAR()] ---- baSAR-model ---- \n")
cat("\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
cat("[analyse_baSAR()] Bayesian analysis in progress ...\n")
message(".. >> bounds set to: lower_centralD =", lower_centralD,
"| upper_centralD =", upper_centralD)
}
Nb_Iterations <- n.MCMC
if (verbose) {
message(
".. >> calculation will be done assuming a '",
distribution,
"' distribution\n"
)
}
##set model
jagsfit <- rjags::jags.model(
file = textConnection(baSAR_models[[distribution]]),
data = data_Liste,
inits = inits,
n.chains = n.chains,
n.adapt = Nb_Iterations,
quiet = !verbose
)
##update jags model (it is a S3-method)
update(
object = jagsfit,
n.iter = Nb_Iterations,
progress.bar = if(verbose){"text"}else{NULL}
)
##get data ... full and reduced, the reduced one to limit the plot output
sampling <- rjags::coda.samples(
model = jagsfit,
variable.names = variable.names,
n.iter = Nb_Iterations,
thin = thin
)
##this we need for the output of the terminal
##Why sampling reduced? Because the summary() method produces a considerable overhead while
##running over all the variables
sampling_reduced <- rjags::coda.samples(
model = jagsfit,
variable.names = c('central_D', 'sigma_D'),
n.iter = Nb_Iterations,
thin = thin
)
pt_zero <- 0
nb_decal <- 2
pt_zero <- Nb_aliquots
##standard error and mean
output.mean <-
round(summary(sampling_reduced)[[1]][c("central_D", "sigma_D"), 1:2], digits)
##calculate geometric mean for the case that the distribution is log-normal
if(distribution == "log_normal"){
temp.vector <- unlist(lapply(sampling_reduced, function(x){as.vector(x[,1])}))
gm <- round(exp(sum(log(temp.vector))/length(temp.vector)),digits)
rm(temp.vector)
}else{
gm <- NULL
}
##quantiles
##68% + 95%
output.quantiles <-
round(summary(sampling_reduced, quantiles = c(0.025, 0.16, 0.84, 0.975))[[2]][c("central_D", "sigma_D"), 1:4], digits)
#### output data.frame with results
baSAR.output <- data.frame(
DISTRIBUTION = distribution,
NB_ALIQUOTS = Nb_aliquots,
N.CHAINS = n.chains,
N.MCMC = n.MCMC,
FIT_METHOD = fit.method,
CENTRAL = if(is.null(gm)){output.mean[1,1]}else{gm},
CENTRAL.SD = output.mean[1,2],
SIGMA = output.mean[2,1],
SIGMA.SD = output.mean[2,2],
CENTRAL_Q_.16 = output.quantiles[1,2],
CENTRAL_Q_.84 = output.quantiles[1,3],
SIGMA_Q_.16 = output.quantiles[2,2],
SIGMA_Q_.84 = output.quantiles[2,3],
CENTRAL_Q_.025 = output.quantiles[1,1],
CENTRAL_Q_.975 = output.quantiles[1,4],
SIGMA_Q_.025 = output.quantiles[2,1],
SIGMA_Q_.975 = output.quantiles[2,4]
)
return(
baSAR.output = list(
baSAR.output_summary = baSAR.output,
baSAR.output_mcmc = sampling,
models = list(
cauchy = baSAR_models[["cauchy"]],
normal = baSAR_models[["normal"]],
log_normal = baSAR_models[["log_normal"]],
user_defined = baSAR_models[["user_defined"]]
)
)
)
}
##END
##////////////////////////////////////////////////////////////////////////////////////////////////
# Integrity tests -----------------------------------------------------------------------------
##check whether rjags is available
##code snippet taken from
##http://r-pkgs.had.co.nz/description.html
# nocov start
if (!requireNamespace("rjags", quietly = TRUE)) {
.throw_error("To use this function you have to first install package 'rjags'")
}
if (!requireNamespace("coda", quietly = TRUE)) {
.throw_error("To use this function you have to first install package 'coda'.")
}
# nocov end
## fit.method
fit.method <- .match_args(fit.method, c("EXP", "EXP+LIN", "LIN"))
.validate_positive_scalar(n.MCMC, int = TRUE)
#capture additional piped arguments
additional_arguments <- list(
##verify_SingleGrainData
threshold = 30,
##calc_OSLLxTxRatio()
background.count.distribution = "non-poisson",
## data.table::fread()
skip = 0,
##read_BIN2R()
n.records = NULL,
duplicated.rm = TRUE,
position = NULL,
pattern = NULL,
##plot_GrowthCurve()
fit.weights = TRUE,
fit.bounds = TRUE,
NumberIterations.MC = 100,
output.plot = plot,
output.plotExtended = plot
)
#modify this list on purpose
additional_arguments <- modifyList(x = additional_arguments,
val = list(...))
##set function arguments
function_arguments <- NULL
# Set input -----------------------------------------------------------------------------------
##if the input is alreayd of type RLum.Results, use the input and do not run
##all pre-calculations again
if(is(object, "RLum.Results")){
if(object@originator == "analyse_baSAR"){
##We want to use previous function arguments and recycle them
##(1) get information you need as input from the RLum.Results object
function_arguments <- as.list(object@info$call)
##(2) overwrite by current provided arguments
##by using a new argument we have the choise which argument is allowed for
##changes
function_arguments.new <- modifyList(x = function_arguments, val = as.list(match.call()))
##get maximum cycles
max_cycles <- max(object$input_object[["CYCLES_NB"]])
##set Nb_aliquots
Nb_aliquots <- nrow(object$input_object)
## return NULL if not at least three aliquots are used for the calculation
if(Nb_aliquots < 2){
message("[analyse_baSAR()] Error: number of aliquots < 3, ",
"this makes no sense, NULL returned")
return(NULL)
}
##set variables
##Why is.null() ... it prevents that in case of a function crash is nothing is provided ...
##set changeable function arguments
##distribution
if(!is.null(function_arguments.new$distribution)){
distribution <- function_arguments.new$distribution
}
##n.MCMC
if(!is.null(function_arguments.new$n.MCMC)){
n.MCMC <- function_arguments.new$n.MCMC
}
##fit.method
if(!is.null(function_arguments.new$fit.method)){
fit.method <- function_arguments.new$fit.method
}
## fit.force_through_origin
if(!is.null(function_arguments.new$fit.force_through_origin)){
fit.force_through_origin <- function_arguments.new$fit.force_through_origin
}
##fit.includingRepeatedRegPoints
if(!is.null(function_arguments.new$fit.includingRepeatedRegPoints)){
fit.includingRepeatedRegPoints <- function_arguments.new$fit.includingRepeatedRegPoints
}
##source_doserate
if(length(as.list(match.call())$source_doserate) > 0){
.throw_warning("'source_doserate' is ignored in this mode as ",
"it was already set")
}
##aliquot_range
if(!is.null(function_arguments.new$aliquot_range)){
aliquot_range <- eval(function_arguments.new$aliquot_range)
}
##method_control
if(!is.null(function_arguments.new$method_control)){
method_control <- eval(function_arguments.new$method_control)
}
##baSAR_model
if(!is.null(function_arguments.new$baSAR_model)){
baSAR_model <- eval(function_arguments.new$baSAR_model)
}
##plot
if(!is.null(function_arguments.new$plot)){
plot <- function_arguments.new$plot
}
##verbose
if(!is.null(function_arguments.new$verbose)){
verbose <- function_arguments.new$verbose
}
##limit according to aliquot_range
##TODO Take care of the case that this was provided, otherwise more and more is removed!
if (!is.null(aliquot_range)) {
if (max(aliquot_range) <= nrow(object$input_object)) {
input_object <- object$input_object[aliquot_range, ]
##update list of removed aliquots
removed_aliquots <-rbind(object$removed_aliquots, object$input_object[-aliquot_range,])
##correct Nb_aliquots
Nb_aliquots <- nrow(input_object)
} else{
message("[analyse_basAR()] Error: 'aliquot_range' out of bounds, ",
"input ignored")
##reset aliquot range
aliquot_range <- NULL
##take entire object
input_object <- object$input_object
##set removed aliquots
removed_aliquots <- object$removed_aliquots
}
} else{
##set the normal case
input_object <- object$input_object
##set removed aliquots
removed_aliquots <- object$removed_aliquots
}
##set non function arguments
Doses <- t(input_object[,9:(8 + max_cycles)])
LxTx <- t(input_object[,(9 + max_cycles):(8 + 2 * max_cycles)])
LxTx.error <- t(input_object[,(9 + 2 * max_cycles):(8 + 3 * max_cycles)])
rm(max_cycles)
}else{
.throw_error("'object' is of type 'RLum.Results', ",
"but was not produced by analyse_baSAR()")
}
}else{
if(verbose){
cat("\n[analyse_baSAR()] ---- PRE-PROCESSING ----\n")
}
##Supported input types are:
## (1) BIN-file
## .. list
## .. character
## (2) RisoeBINfileData object
## .. list
## .. S4
## (3) RLum.Analyis objects
## .. list
## .. S4
##In case an RLum.Analysis object is provided we try an ugly conversion only
if(inherits(object, "list") && all(vapply(object, function(x){inherits(x, "RLum.Analysis")}, logical(1)))){
if(verbose)
cat("[analyse_baSAR()] List of RLum.Analysis-objects detected ..\n")
## set number of objects
n_objects <- length(object)