Skip to content

Commit

Permalink
support rank deficiency in prediction (#676)
Browse files Browse the repository at this point in the history
* support rank deficiency in prediction

* add NEWS

* oops

* Update NEWS.md
  • Loading branch information
palday authored Apr 12, 2023
1 parent e5d8a6a commit 251ef8a
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 7 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
MixedModels v4.10.0 Release Notes
==============================
* Rank deficiency in prediction is now supported, both when the original model was fit to rank-deficient data and when the new data are rank deficient. The behavior is consistent but may be surprising when both old and new data are rank deficient. See the `predict` docstring for an example. [#676]
* Multithreading in `parametricbootstrap` with `use_threads` is now deprecated and a noop. With improvements in BLAS threading, multithreading at the Julia level did not help performance and sometimes hurt it. [#674]

MixedModels v4.9.0 Release Notes
Expand Down Expand Up @@ -405,3 +406,4 @@ Package dependencies
[#665]: https://github.com/JuliaStats/MixedModels.jl/issues/665
[#667]: https://github.com/JuliaStats/MixedModels.jl/issues/667
[#674]: https://github.com/JuliaStats/MixedModels.jl/issues/674
[#676]: https://github.com/JuliaStats/MixedModels.jl/issues/676
23 changes: 16 additions & 7 deletions src/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@ Predict response for new data.
`missing` is not valid because `missing` data are dropped before
constructing the model matrices.
!!! warning
Models are assumed to be full rank.
!!! warning
These methods construct an entire MixedModel behind the scenes and
as such may use a large amount of memory when `newdata` is large.
!!! warning
Rank-deficiency can lead to surprising but consistent behavior.
For example, if there are two perfectly collinear predictors `A`
and `B` (e.g. constant multiples of each other), then it is possible
that `A` will be pivoted out in the fitted model and thus the
associated coefficient is set to zero. If predictions are then
generated on new data where `B` has been set to zero but `A` has
not, then there will no contribution from neither `A` nor `B`
in the resulting predictions.
The keyword argument `new_re_levels` specifies how previously unobserved
values of the grouping variable are handled. Possible values are:
Expand Down Expand Up @@ -63,7 +70,7 @@ Similarly, offsets are also not supported for `GeneralizedLinearMixedModel`.
function StatsAPI.predict(
m::LinearMixedModel, newdata::Tables.ColumnTable; new_re_levels=:missing
)
return _predict(m, newdata, m.β; new_re_levels)
return _predict(m, newdata, coef(m)[m.feterm.piv]; new_re_levels)
end

function StatsAPI.predict(
Expand All @@ -73,13 +80,14 @@ function StatsAPI.predict(
type=:response,
)
type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)"))

y = _predict(m.LMM, newdata, m.β; new_re_levels)
# want pivoted but not truncated
y = _predict(m.LMM, newdata, coef(m)[m.feterm.piv]; new_re_levels)

return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(m)), y, y)
end

# β is separated out here because m.β != m.LMM.β depending on how β is estimated for GLMM
# also β should already be pivoted but NOT truncated in the rank deficient case
function _predict(m::MixedModel{T}, newdata, β; new_re_levels) where {T}
new_re_levels in (:population, :missing, :error) ||
throw(ArgumentError("Invalid value for new_re_levels: $(new_re_levels)"))
Expand Down Expand Up @@ -118,8 +126,9 @@ function _predict(m::MixedModel{T}, newdata, β; new_re_levels) where {T}
ytemp, lmm
end

pivotmatch = mnew.feterm.piv[m.feterm.piv]
grps = fnames(m)
mul!(y, mnew.X, β)
mul!(y, view(mnew.X, :, pivotmatch), β)
# mnew.reterms for the correct Z matrices
# ranef(m) for the BLUPs from the original fit

Expand Down
56 changes: 56 additions & 0 deletions test/predict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,61 @@ end
@test_throws ArgumentError predict(m, slpm)
fill!(slpm.reaction, missing)
@test_throws ArgumentError predict(m, slpm)

end

@testset "rank deficiency" begin
@testset "in original fit" begin
refvals = predict(first(models(:sleepstudy)), slp)

slprd = transform(slp, :days => ByRow(x -> 2x) => :days2)
m = fit(MixedModel, @formula(reaction ~ 1 + days + days2 + (1|subj)), slprd; progress=false)
# predict assumes that X is the correct length and stored pivoted
# so these first two tests will fail if we change that storage detail
@test size(m.X) == (180, 3)
@test all(2 .* view(m.X, :, m.feterm.piv[2]) .== view(m.X, :, m.feterm.piv[3]))
@test predict(m, slprd) == refvals

slprd0 = transform(slp, :days => zero => :days0)
m = fit(MixedModel, @formula(reaction ~ 1 + days0 + days + (1|subj)), slprd0; progress=false)
@test predict(m, slprd0) == refvals
# change the formula order slightly so that the original ordering and hence the
# permutation vector for pivoting isn't identical
m = fit(MixedModel, @formula(reaction ~ 1 + days + days0 + (1|subj)), slprd0; progress=false)
@test predict(m, slprd0) == refvals
end

@testset "in newdata" begin
mref = first(models(:sleepstudy))
# remove days
refvals = fitted(mref) .- view(mref.X, :, 2) * mref.β[2]
slp0 = transform(slp, :days => zero => :days)
vals = predict(mref, slp0)
@test all(refvals .≈ vals)
end

@testset "in both" begin
# now what happens when old and new are rank deficient
mref = first(models(:sleepstudy))
# remove days
refvals = fitted(mref) .- view(mref.X, :, 2) * mref.β[2]
# days gets pivoted out
slprd = transform(slp, :days => ByRow(x -> 2x) => :days2)
m = fit(MixedModel, @formula(reaction ~ 1 + days + days2 + (1|subj)), slprd; progress=false)
# days2 gets pivoted out
slp0 = transform(slp, :days => zero => :days2)
vals = predict(m, slp0)
# but in the original fit, days gets pivoted out, so its coef is zero
# and now we have a column of zeros for days2
# leaving us with only the intercept
# this is consistent behavior
@test all(refvals .≈ vals)

slp1 = transform(slp, :days => ByRow(one) => :days2)
vals = predict(m, slp1)
refvals = fitted(mref) .- view(mref.X, :, 2) * mref.β[2] .+ last(fixef(m))
@test all(refvals .≈ vals)
end
end

@testset "transformed response" begin
Expand Down Expand Up @@ -124,4 +179,5 @@ end
@test predict(gm0, contra; type=:response) gm0.resp.mu rtol=0.01
end
end

end

2 comments on commit 251ef8a

@palday
Copy link
Member Author

@palday palday commented on 251ef8a Apr 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/81481

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v4.10.0 -m "<description of version>" 251ef8a4ce43b0be0d05dcc649b4874ad3b6e209
git push origin v4.10.0

Please sign in to comment.