-
Notifications
You must be signed in to change notification settings - Fork 82
/
Copy pathrstan_MixtureModel.R
108 lines (85 loc) · 3.48 KB
/
rstan_MixtureModel.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#---------------------------------------------------------------------------------#
# Mixture model (draft); The Stan manual notes issues with mixture modeling, esp. #
# label switching (as does the bugs book), but the code in the manual chapter on #
# mixture modeling does not appear able to reproduce the old faithful mixture or #
# even a simulated mixture of normals, even with some heavy hand holding on #
# starting points, limits etc. On occasion the means can be recovered, but the #
# variances are all over the place and notably influenced by the prior. The #
# Stan manual chapter on problematic posteriors covers many of the issues. #
# #
# The following stan code however is verbatim from github and should work fine, #
# albeit slowly. #
# github stan/src/models/basic_estimators/normal_mixture_k_prop.stan #
#---------------------------------------------------------------------------------#
data(faithful)
head(faithful)
par(mfrow=c(1,2))
apply(faithful, 2, ggplot2:::qplot, geom='density')
layout(1)
y = rnorm(500, c(50,80), c(5,10))
ggplot2:::qplot(y, geom='density')
psych::describe(data.frame(y[seq(1,500, 2)], y[seq(2,500, 2)]))
# take your pick
standat = list(N=nrow(faithful), K=2, y=faithful$waiting)
standat = list(N=nrow(faithful), K=2, y=faithful$eruptions)
standat = list(N=length(y), K=2, y=y)
stanmodelcode = '
data {
int<lower=1> K; # K components
int<lower=1> N; # N observations
real y[N]; # variable of interest
}
parameters {
simplex[K] theta; # mixing proportions
simplex[K] mu_prop;
real mu_loc;
real<lower=0> mu_scale;
real<lower=0> sigma[K]; # sds of the components
}
transformed parameters {
ordered[K] mu;
mu = mu_loc + mu_scale * cumulative_sum(mu_prop); # means of the components
}
model {
// prior
mu_loc ~ cauchy(0,5);
mu_scale ~ cauchy(0,5);
sigma ~ cauchy(0,5);
// likelihood
{
real ps[K];
vector[K] log_theta;
log_theta = log(theta);
for (n in 1:N) {
for (k in 1:K) {
ps[k] = log_theta[k] + normal_lpdf(y[n] | mu[k], sigma[k]);
}
target += log_sum_exp(ps);
}
}
}
'
################
### Test Run ###
################
library(rstan)
# the following may take several minutes per chain depending on the data
test = stan(model_code = stanmodelcode, model_name = "example",
data = standat, iter = 7000, warmup=2000, thin=5, chains = 2, cores=2,
verbose = F)
# shinystan::launch_shinystan(test)
print(test, digits=3)
### compare to flexmix: coefs in flexmod1 = mu in test
library(flexmix)
flexmod1 = flexmix(standat$y~1, k=2, control=list(tolerance=1e-12, iter.max=1000))
summary(flexmod1)
parameters(flexmod1)
######################
### Production run ###
######################
# This can take a notably long time (i.e. hours) depending on the data.
fit = stan(model_code = stanmodelcode, model_name = "mixture", fit = test,
data = standat, iter = 62000, warmup=12000, thin=50, cores = 4,
verbose = T)
print(fit, digits=3)
shinystan::launch_shinystan(fit)