Skip to content

Commit

Permalink
Blocked sparse (#102)
Browse files Browse the repository at this point in the history
* Add the long d3 benchmark

* Add and use BlockedSparse type

* Add positive-definite checks on 2x2 case

* Drop LowerTriangular wrap

* Add methods for BlockedSparse

* Remove LowerTriangular wrap

* Tighten code

* Clean up updateL! [ci skip]

* Avoid d3 test pending more debugging

* Avoid fit of d3 pending more debugging

* Reformulate lambda products

* Add tests and methods

* Move rankupdate! test to UniformBlockDiagona.jl

* Tighten code a tad

* Try (unsuccessfully) to avoid allocations

* Add tests for coverage

* Tighten code

* Correct defn for docs

* Export more types and generics

* BlockedSparse for L only

* Extend test coverage

* Reinstate d3 benchmark

* Add test of U cholfact for coverage

* Invoke the d3 benchmark

* Yet another typo fixed
  • Loading branch information
dmbates authored Nov 17, 2017
1 parent 5dca0d6 commit 6d99ac6
Show file tree
Hide file tree
Showing 18 changed files with 335 additions and 259 deletions.
6 changes: 3 additions & 3 deletions benchmark/benchmarks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const mods = Dict{Symbol,Vector{Expr}}(
:bs10 => [:(1+U+V+W+((1+U+V+W)|G)+((1+U+V+W)|H))],
:cake => [:(1+A*B+(1|G))],
:cbpp => [:(1+A+(1|G))], # Binomial glmm, create and rename variables
:d3 => [:(1+U+(1|G)+(1|H)+(1|I))],
:d3 => [:(1+U+(1|G)+(1|H)+(1|I)), :(1+U+(1+U|G)+(1+U|H)+(1+U|I))],
:dialectNL => [:(1+A+T+U+V+W+X+(1|G)+(1|H)+(1|I))],
:egsingle => [:(1+A+U+V+(1|G)+(1|H))],
:epilepsy => [], # unknown origin
Expand Down Expand Up @@ -98,8 +98,8 @@ end
end

@benchgroup "crossed" ["multiple", "crossed", "scalar"] begin
for ds in [:Assay, :Demand, :InstEval, :Penicillin, :ScotsSec, :d3, :dialectNL,
:egsingle, :paulsim]
for ds in [:Assay, :Demand, :InstEval, :Penicillin, :ScotsSec, :d3,
:dialectNL, :egsingle, :paulsim]
for rhs in mods[ds]
@bench string(ds, ':', rhs) fitbobyqa($(QuoteNode(rhs)), $(QuoteNode(ds)))
end
Expand Down
6 changes: 4 additions & 2 deletions src/MixedModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using StatsFuns: log2π
using NamedArrays: NamedArray, setnames!
using Base.LinAlg: BlasFloat, BlasReal, HermOrSym, PosDefException, checksquare, copytri!

import Base: cor, cond, convert, eltype, full, logdet, std
import Base: ReshapedArray, cor, cond, convert, eltype, full, logdet, std
import Base.LinAlg: A_mul_B!, A_mul_Bc!, Ac_mul_B!, A_ldiv_B!, Ac_ldiv_B!, A_rdiv_B!, A_rdiv_Bc!
import NLopt: Opt
import StatsBase: coef, coeftable, dof, deviance, fit!, fitted, loglikelihood,
Expand All @@ -21,6 +21,7 @@ export
Bernoulli,
Binomial,
Block,
BlockedSparse,
Gamma,
LogitLink,
LogLink,
Expand All @@ -42,9 +43,10 @@ export
coef,
coeftable,
cond,
describeblocks,
condVar,
dof,
deviance,
dof,
fit!,
fitted,
fixef, # extract the fixed-effects parameter estimates
Expand Down
48 changes: 21 additions & 27 deletions src/linalg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ function αβA_mul_Bc!(α::T, A::SparseMatrixCSC{T}, B::SparseMatrixCSC{T},
arv = rowvals(A)
bnz = nonzeros(B)
brv = rowvals(B)
if β one(T)
β zero(T) ? scale!(C, β) : fill!(C, β)
end
β == 1 || scale!(C, β)
for j = 1:A.n
for ib in nzrange(B, j)
αbnz = α * bnz[ib]
Expand All @@ -25,15 +23,16 @@ function αβA_mul_Bc!(α::T, A::SparseMatrixCSC{T}, B::SparseMatrixCSC{T},
C
end

αβA_mul_Bc!::T, A::BlockedSparse{T}, B::BlockedSparse{T}, β::T, C::Matrix{T}) where T =
αβA_mul_Bc!(α, A.cscmat, B.cscmat, β, C)

function αβA_mul_Bc!::T, A::StridedVecOrMat{T}, B::SparseMatrixCSC{T}, β::T,
C::StridedVecOrMat{T}) where T
m, n = size(A)
p, q = size(B)
r, s = size(C)
@argcheck(r == m && s == p && n == q, DimensionMismatch)
if β one(T)
iszero(β) ? fill!(C, β) : scale!(C, β)
end
β == 1 || scale!(C, β)
nz = nonzeros(B)
rv = rowvals(B)
@inbounds for j in 1:q, k in nzrange(B, j)
Expand All @@ -46,6 +45,9 @@ function αβA_mul_Bc!(α::T, A::StridedVecOrMat{T}, B::SparseMatrixCSC{T}, β::
C
end

αβA_mul_Bc!::T, A::StridedVecOrMat{T}, B::BlockedSparse{T}, β::T,
C::StridedVecOrMat{T}) where T = αβA_mul_Bc!(α, A, B.cscmat, β, C)

αβAc_mul_B!::T, A::StridedMatrix{T}, B::StridedVector{T}, β::T,
C::StridedVector{T}) where {T<:BlasFloat} = BLAS.gemv!('C', α, A, B, β, C)

Expand Down Expand Up @@ -74,9 +76,7 @@ if VERSION < v"0.7.0-DEV.586"
A_rdiv_Bc!(A::StridedMatrix{T}, D::Diagonal{T}) where {T} = A_rdiv_B!(A, D)

function A_rdiv_Bc!(A::SparseMatrixCSC{T}, D::Diagonal{T}) where T
if size(D, 2) size(A, 2)
throw(DimensionMismatch("size(A,2)=$(size(A,2)) should be size(D, 1)=$(size(D,1))"))
end
@argcheck(size(D, 2) == size(A, 2), DimensionMismatch)
dd = D.diag
nonz = nonzeros(A)
for j in 1:A.n
Expand All @@ -89,28 +89,22 @@ if VERSION < v"0.7.0-DEV.586"
end
end

function A_rdiv_Bc!(A::Matrix{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where {T}
m, n, k = size(B.data.data)
@argcheck size(A, 2) == size(B, 1) && m == n DimensionMismatch
offset = 0
one2m = 1:m
for f in B.data.facevec
BLAS.trsm!('R', 'L', 'T', 'N', one(T), f, view(A, :, one2m + offset))
offset += m
function A_rdiv_Bc!(A::Matrix{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where T
Bd = B.data
m, n, k = size(Bd.data)
@argcheck(size(A, 2) == size(Bd, 1) && m == n, DimensionMismatch)
inds = 1:m
for f in Bd.facevec
BLAS.trsm!('R', 'L', 'T', 'N', one(T), f, view(A, :, inds))
inds += m
end
A
end

function A_rdiv_Bc!(A::SparseMatrixCSC{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where {T}
nz = nonzeros(A)
offset = 0
m, n, k = size(B.data.data)
for f in B.data.facevec
nzr = nzrange(A, offset + 1).start : nzrange(A, offset + n).stop
q = div(length(nzr), m)
## FIXME Still allocating 1.4 GB. Call BLAS.trsm directly
A_rdiv_Bc!(unsafe_wrap(Array, pointer(nz, nzr[1]), (q, m)), LowerTriangular(f))
offset += n
function A_rdiv_Bc!(A::BlockedSparse{T}, B::LowerTriangular{T,UniformBlockDiagonal{T}}) where T
@argcheck(length(A.colblocks) == length(B.data.facevec), DimensionMismatch)
for (b,f) in zip(A.colblocks, B.data.facevec)
A_rdiv_Bc!(b, LowerTriangular(f))
end
A
end
6 changes: 4 additions & 2 deletions src/linalg/cholUnblocked.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ function cholUnblocked!(A::StridedMatrix{T}, ::Type{Val{:L}}) where T<:BlasFloat
A[1] < zero(T) && throw(PosDefException(1))
A[1] = sqrt(A[1])
elseif n == 2
A[1] < zero(T) && throw(PosDefException(1))
A[1] = sqrt(A[1])
A[2] /= A[1]
A[4] = sqrt(A[4] - abs2(A[2]))
(A[4] -= abs2(A[2])) < zero(T) && throw(PosDefException(2))
A[4] = sqrt(A[4])
else
_, info = LAPACK.potrf!('L', A)
info 0 && throw(PosDefException(info))
iszero(info) || throw(PosDefException(info))
end
A
end
Expand Down
119 changes: 44 additions & 75 deletions src/linalg/lambdaprods.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""
Λc_mul_B!(A::AbstractTerm, B::AbstractArray)
A_mul_Λ!(A::AbstractArray, B::AbstractTerm)
In-place products w.r.t. blocks of Λ or Λ′
In-place product of `A` with a repeated block diagonal expansion of `B.Λ``
An [`AbstractTerm`]{@ref} of size n×k includes a description of a lower triangular
k×k matrix determined by the θ parameter vector. These matrices are, at most, repeated
Expand All @@ -12,94 +11,64 @@ with a [`MatrixTerm`]{@ref} is the identity.
See also [`scaleinflate!`]{@ref} which performs two such multiplications plus inflation of
the diagonal plus a copy! operation in one step.
"""
function Λc_mul_B! end
function A_mul_Λ! end

Λc_mul_B!(A::MatrixTerm, B) = B
A_mul_Λ!(A, B::MatrixTerm) = A

A_mul_Λ!(A, B::ScalarFactorReTerm) = scale!(A, B.Λ)
Λc_mul_B!(A::ScalarFactorReTerm, B) = scale!(A.Λ, B)

function A_mul_Λ!(A::SparseMatrixCSC{T,S}, B::VectorFactorReTerm{T}) where {T,S}
k = vsize(B)
nz = nonzeros(A)
λ = LowerTriangular(B.Λ)
m, n = size(A)
cp = A.colptr
rv = rowvals(A)
blkstart = 1
while blkstart n
i1 = nzrange(A, blkstart)
r = length(i1)
if (cp[blkstart + k] - cp[blkstart]) length(i1) * k
throw(ArgumentError("A is not compatible with B"))
end
## consider using a pointer here to cut down on allocation (~ 1GB for d3 fit)
a = reshape(view(nz, cp[blkstart]:(cp[blkstart + k] - 1)), (r, k))
A_mul_B!(a, a, λ)
blkstart += k
function A_mul_Λ!(A::BlockedSparse{T}, B::VectorFactorReTerm{T}) where T
λ = B.Λ
for blk in A.colblocks
A_mul_B!(blk, λ)
end
A
end

function Λ_mul_B!(A::VectorFactorReTerm{T}, B::StridedVector{T}) where T
@argcheck (k = vsize(A)) > 1
λ = LowerTriangular(A.Λ)
A_mul_B!(λ, reshape(B, (k, div(length(B), k))))
B
end

Λ_mul_B!(A::ScalarFactorReTerm{T}, B::StridedVecOrMat{T}) where T = scale!(B, A.Λ)

function A_mul_Λ!(A::Matrix{T}, B::VectorFactorReTerm{T}) where T<:AbstractFloat
@argcheck (k = vsize(B)) > 1
λ = LowerTriangular(B.Λ)
function A_mul_Λ!(A::Matrix{T}, B::VectorFactorReTerm{T,V,R,S}) where {T,V,R,S}
λ = B.Λ
m, n = size(A)
q, r = divrem(n, k)
if r 0
throw(DimensionMismatch("size(A, 2) = $n is not a multiple of size(B.λ, 1) = $k"))
end
offset = 0
onetok = 1:k
for blk in 1:q
## another place where ~ 1GB is allocated in d3 fit
A_mul_B!(view(A, :, onetok + offset), λ)
offset += k
q, r = divrem(n, S)
iszero(r) || throw(DimensionMismatch("size(A, 2) = $n is not a multiple of S = $S"))
A3 = reshape(A, (m, S, q))
for k in 1:q
A_mul_B!(view(A3, :, :, k), λ)
end
A
end

Λ_mul_B!(C::AbstractArray{T}, A::ScalarFactorReTerm{T}, B::AbstractArray{T}) where T = scale!(C, A.Λ, B)
"""
Λc_mul_B!(A::AbstractTerm, B::AbstractArray)
In-place product of a repeated block diagonal expansion of `A.Λ'` with `B`
function Λ_mul_B!(C::StridedVecOrMat{T}, A::VectorFactorReTerm{T},
B::StridedVecOrMat{T}) where T
@argcheck(size(C) == size(B), DimensionMismatch)
k = vsize(A)
q, r = divrem(size(C, 1), k)
iszero(r) || throw(ArgumentError("size(C, 1) = $(size(C,1)) is not a multiple of $k = vsize(A)"))
A_mul_B!(LowerTriangular(A.Λ), reshape(copy!(C, B), (k, size(C, 2) * q)))
C
See also [`scaleinflate!`]{@ref} which performs two such multiplications plus inflation of
the diagonal plus a copy! operation in one step.
"""
function Λc_mul_B! end
Λc_mul_B!(A::MatrixTerm, B) = B
Λc_mul_B!(A::ScalarFactorReTerm, B) = scale!(A.Λ, B)
function Λc_mul_B!(A::VectorFactorReTerm{T,V,R,S}, B::Matrix{T}) where {T,V,R,S}
m, n = size(B)
Ac_mul_B!(A.Λ, reshape(B, (S, div(m, S) * n)))
B
end

function Λc_mul_B!(A::VectorFactorReTerm{T}, B::StridedVecOrMat{T}) where T
@argcheck (k = vsize(A)) > 1
λ = LowerTriangular(A.Λ)
m, n = size(B, 1), size(B, 2)
Ac_mul_B!(λ, reshape(B, (k, div(m, k) * n)))
function Λc_mul_B!(A::VectorFactorReTerm{T}, B::BlockedSparse{T}) where T
Ac_mul_B!(A.Λ, B.nzsasmat)
B
end

function Λc_mul_B!(A::VectorFactorReTerm{T}, B::SparseMatrixCSC{T}) where {T}
@argcheck (k = vsize(A)) > 1
nz = nonzeros(B)
λ = LowerTriangular(A.Λ)
for j in 1:B.n
## third place with over 1 GB allocation in d3 fit
## probably call BLAS.trmm directly here
bnz = view(nz, nzrange(B, j))
mbj = reshape(bnz, (k, div(length(bnz), k)))
Ac_mul_B!(mbj, λ, mbj)
end
B
"""
Λ_mul_B!(C::Matrix, A::AbstractFactorReTerm, B::Matrix)
Mutating product of the repeated block-diagonal expansion of `A` and `B` into `C`
This multiplication is used to convert "spherical" random effects to the original scale.
"""
function Λ_mul_B!(C::Matrix, A::AbstractFactorReTerm, B::Matrix) end

function Λ_mul_B!(C::Matrix{T}, A::ScalarFactorReTerm{T}, B::Matrix{T}) where T
@argcheck(size(C) == size(B) == (1, size(A, 2)), DimensionMismatch)
scale!(C, A.Λ, B)
end

function Λ_mul_B!(C::Matrix{T}, A::VectorFactorReTerm{T,V,R,S}, B::Matrix{T}) where {T,V,R,S}
@argcheck(size(C) == size(B) == (S, div(size(A, 2), S)), DimensionMismatch)
A_mul_B!(A.Λ, copy!(C, B))
end
2 changes: 1 addition & 1 deletion src/linalg/logdet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ function logdet(m::LinearMixedModel{T}) where {T}
Ldat = m.L.data
for (i, trm) in enumerate(m.trms)
if isa(trm, AbstractFactorReTerm)
s += LD(m.L.data[Block(i, i)])
s += LD(Ldat[Block(i, i)])
end
end
2s
Expand Down
36 changes: 13 additions & 23 deletions src/linalg/rankUpdate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ function rankUpdate!(α::T, A::SparseMatrixCSC{T},
m, n = size(A)
@argcheck m == size(C, 2) && C.uplo == 'L' DimensionMismatch
Cd = C.data
if β one(T)
scale!(LowerTriangular(Cd), β)
end
β == 1 || scale!(LowerTriangular(Cd), β)
rv = rowvals(A)
nz = nonzeros(A)
@inbounds for jj in 1:n
Expand All @@ -57,10 +55,13 @@ end
rankUpdate!::T, A::SparseMatrixCSC{T}, C::HermOrSym{T}) where {T} =
rankUpdate!(α, A, one(T), C)

rankUpdate!::T, A::BlockedSparse{T}, C::HermOrSym{T}) where {T} =
rankUpdate!(α, A.cscmat, one(T), C)

function rankUpdate!::T, A::SparseMatrixCSC{T}, C::Diagonal{T}) where T <: Number
m, n = size(A)
dd = C.diag
@argcheck length(dd) == m DimensionMismatch
@argcheck(length(dd) == m, DimensionMismatch)
nz = nonzeros(A)
rv = rowvals(A)
for j in 1:n
Expand All @@ -74,26 +75,15 @@ function rankUpdate!(α::T, A::SparseMatrixCSC{T}, C::Diagonal{T}) where T <: Nu
C
end

function rankUpdate!::T, A::SparseMatrixCSC{T},
C::HermOrSym{T,UniformBlockDiagonal{T}}) where T<:Number
m, n, k = size(C.data.data)
@argcheck m == n && size(A, 1) == m * k DimensionMismatch
# Another expensive evaluation in terms of storage allocation
aat = α * (A * A')
nz = nonzeros(aat)
rv = rowvals(aat)
offset = 0
for f in C.data.facevec
for j in 1:m
for i in nzrange(aat, offset + j)
ii = rv[i] - offset
0 < ii k || throw(ArgumentError("A*A' does not conform to B"))
if ii j # update lower triangle only
f[ii, j] += nz[i]
end
end
function rankUpdate!::T, A::BlockedSparse{T}, C::HermOrSym{T,UniformBlockDiagonal{T}}) where T
Arb = A.rowblocks
Cdf = C.data.facevec
(m = length(Arb)) == length(Cdf) ||
throw(DimensionMismatch("length(A.rowblocks) = $m$(length(Cdf)) = length(C.data.facevec)"))
for (b, d) in zip(Arb, Cdf)
for v in b
BLAS.syr!('L', α, v, d)
end
offset += m
end
C
end
Loading

0 comments on commit 6d99ac6

Please sign in to comment.