Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow LMM to init GLMM #588

Merged
merged 7 commits into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MixedModels"
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
author = ["Phillip Alday <me@phillipalday.com>", "Douglas Bates <dmbates@gmail.com>", "Jose Bayoan Santiago Calderon <jbs3hp@virginia.edu>"]
version = "4.5.0"
version = "4.6.0"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
104 changes: 41 additions & 63 deletions src/generalizedlinearmixedmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -167,30 +167,9 @@ function fit(
tbl,
d::Distribution=Normal(),
l::Link=canonicallink(d);
wts=[],
contrasts=Dict{Symbol,Any}(),
offset=[],
verbose::Bool=false,
fast::Bool=false,
nAGQ::Integer=1,
progress::Bool=true,
thin::Int=typemax(Int),
kwargs...,
)
return fit(
GeneralizedLinearMixedModel,
f,
columntable(tbl),
d,
l;
wts,
offset,
contrasts,
verbose,
fast,
nAGQ,
progress,
thin,
)
return fit(GeneralizedLinearMixedModel, f, columntable(tbl), d, l; kwargs...)
end

function fit(
Expand All @@ -202,21 +181,10 @@ function fit(
wts=[],
contrasts=Dict{Symbol,Any}(),
offset=[],
verbose::Bool=false,
fast::Bool=false,
nAGQ::Integer=1,
progress::Bool=true,
thin::Int=typemax(Int),
kwargs...,
)
return fit!(
GeneralizedLinearMixedModel(
f, tbl, d, l; wts=wts, offset=offset, contrasts=contrasts
);
verbose,
fast,
nAGQ,
progress,
thin,
GeneralizedLinearMixedModel(f, tbl, d, l; wts, offset, contrasts); kwargs...
)
end

Expand All @@ -226,37 +194,16 @@ function fit(
tbl,
d::Distribution,
l::Link=canonicallink(d);
wts=[],
contrasts=Dict{Symbol,Any}(),
offset=[],
verbose::Bool=false,
REML::Bool=false,
fast::Bool=false,
nAGQ::Integer=1,
progress::Bool=true,
thin::Int=typemax(Int),
kwargs...,
)
return fit(
GeneralizedLinearMixedModel,
f,
tbl,
d,
l;
wts,
contrasts,
offset,
verbose,
fast,
nAGQ,
progress,
thin,
)
return fit(GeneralizedLinearMixedModel, f, tbl, d, l; kwargs...)
end

"""
fit!(m::GeneralizedLinearMixedModel; fast=false, nAGQ=1,
verbose=false, progress=true,
thin::Int=1)
thin::Int=1,
init_from_lmm=Set())

Optimize the objective function for `m`.

Expand All @@ -271,6 +218,25 @@ and it may not be shown at all for models that are optimized quickly.
If `verbose` is `true`, then both the intermediate results of both the nonlinear optimization and PIRLS are also displayed on standard output.

At every `thin`th iteration is recorded in `fitlog`, optimization progress is saved in `m.optsum.fitlog`.

By default, the starting values for model fitting are taken from a (non mixed,
i.e. marginal ) GLM fit. Experience with larger datasets (many thousands of
observations and/or hundreds of levels of the grouping variables) has suggested
that fittinga (Gaussian) linear mixed model on the untransformed data may
provide better starting values and thus overall faster fits even though an
entire LMM must be fit before the GLMM can be fit. `init_from_lmm` can be used
to specify which starting values from an LMM with. Valid options are any
collection (array, set, etc.) containing one or more of `:β` and `:θ`, the
default is the empty set.

!!! note
Initializing from an LMM requires fitting the entire LMM first, so when
`progress=true`, there will be two progress bars: first for the LMM, then
for the GLMM.

!!! warning
The `init_from_lmm` functionality is experimental and may change or be removed entirely
without being considered a breaking change.
"""
function fit!(
m::GeneralizedLinearMixedModel{T};
Expand All @@ -279,11 +245,16 @@ function fit!(
nAGQ::Integer=1,
progress::Bool=true,
thin::Int=typemax(Int),
init_from_lmm=Set(),
) where {T}
β = m.β
β = copy(m.β)
θ = copy(m.θ)
lm = m.LMM
optsum = lm.optsum

issubset(init_from_lmm, [:θ, :β]) ||
throw(ArgumentError("Invalid parameter selection for init_from_lmm"))

if optsum.feval > 0
throw(ArgumentError("This model has already been fitted. Use refit!() instead."))
end
Expand All @@ -292,9 +263,16 @@ function fit!(
throw(ArgumentError("The response is constant and thus model fitting has failed"))
end

if !isempty(init_from_lmm)
fit!(lm; progress)
:θ in init_from_lmm && copyto!(θ, lm.θ)
:β in init_from_lmm && copyto!(β, lm.β)
unfit!(lm)
end

if !fast
optsum.lowerbd = vcat(fill!(similar(β), T(-Inf)), optsum.lowerbd)
optsum.initial = vcat(β, m.θ)
optsum.initial = vcat(β, lm.optsum.final)
optsum.final = copy(optsum.initial)
end
setpar! = fast ? setθ! : setβθ!
Expand Down
4 changes: 2 additions & 2 deletions test/pirls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ end

@testset "cbpp" begin
cbpp = dataset(:cbpp)
gm2 = fit(MixedModel, first(gfms[:cbpp]), cbpp, Binomial(); wts=float(cbpp.hsz), progress=false)
gm2 = fit(MixedModel, first(gfms[:cbpp]), cbpp, Binomial(); wts=float(cbpp.hsz), progress=false, init_from_lmm=[:β, :θ])
@test weights(gm2) == cbpp.hsz
@test deviance(gm2,true) ≈ 100.09585619892968 atol=0.0001
@test sum(abs2, gm2.u[1]) ≈ 9.723054788538546 atol=0.0001
@test logdet(gm2) ≈ 16.90105378801136 atol=0.0001
@test logdet(gm2) ≈ 16.90105378801136 atol=0.001
@test isapprox(sum(gm2.resp.devresid), 73.47174762237978, atol=0.001)
@test isapprox(loglikelihood(gm2), -92.02628186840045, atol=0.001)
@test !dispersion_parameter(gm2)
Expand Down