From c13786de9e93dc770d1acf2804d51dff2dcfe445 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Fri, 13 Mar 2020 11:13:57 -0400
Subject: [PATCH 01/11] initial commit, no tests yet
---
Project.toml | 2 +-
src/Statistics.jl | 121 ++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 118 insertions(+), 5 deletions(-)
diff --git a/Project.toml b/Project.toml
index 12c96773..9ab57407 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,8 +1,8 @@
name = "Statistics"
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+OhMyREPL = "5fb14364-9ced-5910-84b2-373655c76a03"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[extras]
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 977ae8a6..7c03e3b5 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -479,10 +479,13 @@ end
_vmean(x::AbstractVector, vardim::Int) = mean(x)
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
+_abs2(x::Real) = abs2(x)
+_abs2(x) = x*x'
+
# core functions
-unscaled_covzm(x::AbstractVector{<:Number}) = sum(abs2, x)
-unscaled_covzm(x::AbstractVector) = sum(t -> t*t', x)
+unscaled_covzm(x::AbstractVector{<:Number}) = sum(_abs2, x)
+unscaled_covzm(x::AbstractVector) = sum(_abs2, x)
unscaled_covzm(x::AbstractMatrix, vardim::Int) = (vardim == 1 ? _conj(x'x) : x * x')
unscaled_covzm(x::AbstractVector, y::AbstractVector) = sum(conj(y[i])*x[i] for i in eachindex(y, x))
@@ -494,7 +497,25 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
# covzm (with centered data)
-
+function covzm(itr::Any; corrected::Bool=true)
+ y = iterate(itr)
+ if y === nothing
+ return Base.mapreduce_empty_iter(_abs2, Base.add_sum, itr,
+ Base.IteratorEltype(itr)) / 0
+ end
+ count = 1
+ value, state = y
+ f_value = _abs2(value)
+ total = Base.reduce_first(Base.add_sum, f_value)
+ y = iterate(itr, state)
+ while y !== nothing
+ value, state = y
+ total += _abs2(value)
+ count += 1
+ y = iterate(itr, state)
+ end
+ return total / (count - Int(corrected))
+end
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
C = unscaled_covzm(x, vardim)
@@ -504,6 +525,28 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
+function covzm(x::Any, y::Any; corrected::Bool=true)
+ z = zip(x, y)
+ z_itr = iterate(z)
+ if z_itr === nothing
+ # TODO: Understand how to improve this error.
+ #return Base.mapreduce_empty_iter(t -> _conj(t[2])*t[1]', Base.add_sum, itr,
+ # Base.IteratorEltype(x)) / 0
+ return NaN
+ end
+ count = 1
+ value, state = z_itr
+ f_value = conj(value[2])*value[1]'
+ total = Base.reduce_first(Base.add_sum, f_value)
+ z_itr = iterate(z, state)
+ while z_itr !== nothing
+ value, state = z_itr
+ total += conj(value[2])*value[1]'
+ count += 1
+ z_itr = iterate(z, state)
+ end
+ return total ./ (count - Int(corrected))
+end
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -517,17 +560,75 @@ end
# covm (with provided mean)
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
-## which can't be handled by broadcast
+## which can't be handled by broadcastz
+function covm(itr::Any, itrmean; corrected::Bool=true)
+ y = iterate(itr)
+ f = let itrmean = itrmean
+ x -> _abs2(x-itrmean)
+ end
+ if y === nothing
+ return Base.mapreduce_empty_iter(f, Base.add_sum, itr,
+ Base.IteratorEltype(itr)) / 0
+ end
+ count = 1
+ value, state = y
+ f_value = f(value)
+ total = Base.reduce_first(Base.add_sum, f_value)
+ y = iterate(itr, state)
+ while y !== nothing
+ value, state = y
+ total += f(value)
+ count += 1
+ y = iterate(itr, state)
+ end
+ return total / (count - Int(corrected))
+end
covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
+function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
+ z = zip(x, y)
+ z_itr = iterate(z)
+ if z_itr === nothing
+ # TODO: Understand how to improve this error.
+ #return Base.mapreduce_empty_iter(t -> _conj(t[2])*t[1]', Base.add_sum, itr,
+ # Base.IteratorEltype(x)) / 0
+ return NaN
+ end
+ f = let xmean = xmean, ymean = ymean
+ t -> conj(t[2]-ymean)*(t[1]-xmean)
+ end
+ count = 1
+ value, state = z_itr
+ f_value = f(value)
+ total = Base.reduce_first(Base.add_sum, f_value)
+ z_itr = iterate(z, state)
+ while z_itr !== nothing
+ value, state = z_itr
+ total += f(value)
+ count += 1
+ z_itr = iterate(z, state)
+ end
+ return total ./ (count - Int(corrected))
+end
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, y .- ymean, vardim; corrected=corrected)
# cov (API)
+"""
+ cov(x::Any; corrected::Bool=true)
+
+Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum
+is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n`
+is the number of elements in the iterator, which is not necessarily known.
+"""
+function cov(x::Any, corrected::Bool=true)
+ covm(x, mean(x); corrected=corrected)
+end
+
"""
cov(x::AbstractVector; corrected::Bool=true)
@@ -546,6 +647,18 @@ if `corrected` is `false` where `n = size(X, dims)`.
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
covm(X, _vmean(X, dims), dims; corrected=corrected)
+"""
+ cov(x::Any, y::Any; corrected::Bool=true)
+
+Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the
+default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
+``*`` denotes the complex conjugate and `n` is the number of elements in `x` which must equal
+the number of elements in `y`. If `corrected` is `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n
+(x_i-\\bar x) (y_i-\\bar y)^*``.
+"""
+cov(x::Any, y::Any; corrected::Bool=true) =
+ covm(x, mean(x), y, mean(y); corrected=corrected)
+
"""
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
From dd9c04f49f9a77836786bef305977714f208bf66 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Fri, 13 Mar 2020 11:35:21 -0400
Subject: [PATCH 02/11] Add a few tests
---
src/Statistics.jl | 4 ++--
test/runtests.jl | 9 +++++++--
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 7c03e3b5..ff6ededd 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -479,7 +479,7 @@ end
_vmean(x::AbstractVector, vardim::Int) = mean(x)
_vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
-_abs2(x::Real) = abs2(x)
+_abs2(x::Number) = abs2(x)
_abs2(x) = x*x'
# core functions
@@ -597,7 +597,7 @@ function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
return NaN
end
f = let xmean = xmean, ymean = ymean
- t -> conj(t[2]-ymean)*(t[1]-xmean)
+ t -> conj(t[2]-ymean)*(t[1]-xmean)'
end
count = 1
value, state = z_itr
diff --git a/test/runtests.jl b/test/runtests.jl
index bc33cf57..131c49c8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -314,6 +314,7 @@ Y = [6.0 2.0;
5.0 8.0;
3.0 4.0;
2.0 3.0]
+X_gen = ([X[i,1], X[i,2]] for i in 1:size(X, 1))
@testset "covariance" begin
for vd in [1, 2], zm in [true, false], cr in [true, false]
@@ -328,6 +329,8 @@ Y = [6.0 2.0;
end
x1 = vec(X[:,1])
y1 = vec(Y[:,1])
+ x1_gen = (x for x in x1)
+ y1_gen = (y for y in y1)
else
k = size(X, 1)
Cxx = zeros(k, k)
@@ -338,6 +341,8 @@ Y = [6.0 2.0;
end
x1 = vec(X[1,:])
y1 = vec(Y[1,:])
+ x1_gen = (x for x in x1)
+ y1_gen = (y for y in y1)
end
c = zm ? Statistics.covm(x1, 0, corrected=cr) :
@@ -346,14 +351,14 @@ Y = [6.0 2.0;
@test c ≈ Cxx[1,1]
@inferred cov(x1, corrected=cr)
- @test cov(X) == Statistics.covm(X, mean(X, dims=1))
+ @test cov(X) == cov(X_gen) == Statistics.covm(X, mean(X, dims=1))
C = zm ? Statistics.covm(X, 0, vd, corrected=cr) :
cov(X, dims=vd, corrected=cr)
@test size(C) == (k, k)
@test C ≈ Cxx
@inferred cov(X, dims=vd, corrected=cr)
- @test cov(x1, y1) == Statistics.covm(x1, mean(x1), y1, mean(y1))
+ @test cov(x1, y1) == cov(x1_gen, y1_gen) == Statistics.covm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.covm(x1, 0, y1, 0, corrected=cr) :
cov(x1, y1, corrected=cr)
@test isa(c, Float64)
From 38a0706ff1fbb9a79e7b62e82ce018d6c7ec125f Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Fri, 13 Mar 2020 11:38:32 -0400
Subject: [PATCH 03/11] fix project.toml
---
Project.toml | 1 -
1 file changed, 1 deletion(-)
diff --git a/Project.toml b/Project.toml
index 9ab57407..21bd1852 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,7 +2,6 @@ name = "Statistics"
[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-OhMyREPL = "5fb14364-9ced-5910-84b2-373655c76a03"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[extras]
From 174040c5e2514b988e748581b83a5009e03e2d58 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 9 Apr 2020 12:32:01 -0400
Subject: [PATCH 04/11] Fix transpose bug and conjugate bug
---
src/Statistics.jl | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index ff6ededd..bfab3aa9 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -482,13 +482,16 @@ _vmean(x::AbstractMatrix, vardim::Int) = mean(x, dims=vardim)
_abs2(x::Number) = abs2(x)
_abs2(x) = x*x'
+_conjmul(x::Number, y::Number) = x * conj(y)
+_conjmul(x, y) = x * _conj(y)'
+
# core functions
unscaled_covzm(x::AbstractVector{<:Number}) = sum(_abs2, x)
unscaled_covzm(x::AbstractVector) = sum(_abs2, x)
unscaled_covzm(x::AbstractMatrix, vardim::Int) = (vardim == 1 ? _conj(x'x) : x * x')
-unscaled_covzm(x::AbstractVector, y::AbstractVector) = sum(conj(y[i])*x[i] for i in eachindex(y, x))
+unscaled_covzm(x::AbstractVector, y::AbstractVector) = sum(_conjmul(x[i], y[i]) for i in eachindex(y, x))
unscaled_covzm(x::AbstractVector, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(transpose(x), transpose(_conj(y))))
unscaled_covzm(x::AbstractMatrix, y::AbstractVector, vardim::Int) =
@@ -536,12 +539,12 @@ function covzm(x::Any, y::Any; corrected::Bool=true)
end
count = 1
value, state = z_itr
- f_value = conj(value[2])*value[1]'
+ f_value = _conjmul(value[1], value[2])
total = Base.reduce_first(Base.add_sum, f_value)
z_itr = iterate(z, state)
while z_itr !== nothing
value, state = z_itr
- total += conj(value[2])*value[1]'
+ total += _conjmul(value[1], value[2])
count += 1
z_itr = iterate(z, state)
end
From 131ae700bdbe0bf994f87aff835eff68d07ab96b Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 9 Apr 2020 17:12:57 -0400
Subject: [PATCH 05/11] respond to code review and add complex number tests
---
src/Statistics.jl | 65 ++++++++++++++++++++---------------------------
test/runtests.jl | 19 ++++++++++----
2 files changed, 41 insertions(+), 43 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index bfab3aa9..92fbeb84 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -487,7 +487,6 @@ _conjmul(x, y) = x * _conj(y)'
# core functions
-unscaled_covzm(x::AbstractVector{<:Number}) = sum(_abs2, x)
unscaled_covzm(x::AbstractVector) = sum(_abs2, x)
unscaled_covzm(x::AbstractMatrix, vardim::Int) = (vardim == 1 ? _conj(x'x) : x * x')
@@ -503,13 +502,13 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
function covzm(itr::Any; corrected::Bool=true)
y = iterate(itr)
if y === nothing
- return Base.mapreduce_empty_iter(_abs2, Base.add_sum, itr,
- Base.IteratorEltype(itr)) / 0
+ v = _abs2(zero(eltype(itr)))
+ return (v + v) / 0
end
count = 1
value, state = y
f_value = _abs2(value)
- total = Base.reduce_first(Base.add_sum, f_value)
+ total = Base.reduce_first(+, f_value)
y = iterate(itr, state)
while y !== nothing
value, state = y
@@ -532,23 +531,21 @@ function covzm(x::Any, y::Any; corrected::Bool=true)
z = zip(x, y)
z_itr = iterate(z)
if z_itr === nothing
- # TODO: Understand how to improve this error.
- #return Base.mapreduce_empty_iter(t -> _conj(t[2])*t[1]', Base.add_sum, itr,
- # Base.IteratorEltype(x)) / 0
- return NaN
+ v = _conjmul(zero(eltype(x)), zero(eltype(y)))
+ return (v + v) / 0
end
count = 1
- value, state = z_itr
- f_value = _conjmul(value[1], value[2])
- total = Base.reduce_first(Base.add_sum, f_value)
+ (xi, yi), state = z_itr
+ f_value = _conjmul(xi, yi)
+ total = Base.reduce_first(+, f_value)
z_itr = iterate(z, state)
while z_itr !== nothing
- value, state = z_itr
- total += _conjmul(value[1], value[2])
+ (xi, yi), state = z_itr
+ total += _conjmul(xi, yi)
count += 1
z_itr = iterate(z, state)
end
- return total ./ (count - Int(corrected))
+ return total / (count - Int(corrected))
end
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
@@ -563,24 +560,21 @@ end
# covm (with provided mean)
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
-## which can't be handled by broadcastz
+## which can't be handled by broadcast
function covm(itr::Any, itrmean; corrected::Bool=true)
y = iterate(itr)
- f = let itrmean = itrmean
- x -> _abs2(x-itrmean)
- end
if y === nothing
- return Base.mapreduce_empty_iter(f, Base.add_sum, itr,
- Base.IteratorEltype(itr)) / 0
+ v = _abs2(zero(eltype(itr - itrmean)))
+ return (v + v) / 0
end
count = 1
- value, state = y
- f_value = f(value)
- total = Base.reduce_first(Base.add_sum, f_value)
+ itri, state = y
+ first_value = _abs2(itri - itrmean)
+ total = Base.reduce_first(+, first_value)
y = iterate(itr, state)
while y !== nothing
- value, state = y
- total += f(value)
+ itri, state = y
+ total += _abs2(itri - itrmean)
count += 1
y = iterate(itr, state)
end
@@ -594,26 +588,21 @@ function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
z = zip(x, y)
z_itr = iterate(z)
if z_itr === nothing
- # TODO: Understand how to improve this error.
- #return Base.mapreduce_empty_iter(t -> _conj(t[2])*t[1]', Base.add_sum, itr,
- # Base.IteratorEltype(x)) / 0
- return NaN
- end
- f = let xmean = xmean, ymean = ymean
- t -> conj(t[2]-ymean)*(t[1]-xmean)'
+ v = _conjmul(zero(eltype(x)), zero(eltype(y)))
+ return (v + v) / 0
end
count = 1
- value, state = z_itr
- f_value = f(value)
- total = Base.reduce_first(Base.add_sum, f_value)
+ (xi, yi), state = z_itr
+ first_value = _conjmul(xi-xmean, yi-ymean)
+ total = Base.reduce_first(+, first_value)
z_itr = iterate(z, state)
while z_itr !== nothing
- value, state = z_itr
- total += f(value)
+ (xi, yi), state = z_itr
+ total += _conjmul(xi-xmean, yi-ymean)
count += 1
z_itr = iterate(z, state)
end
- return total ./ (count - Int(corrected))
+ return total / (count - Int(corrected))
end
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
diff --git a/test/runtests.jl b/test/runtests.jl
index 131c49c8..57859b79 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -314,7 +314,10 @@ Y = [6.0 2.0;
5.0 8.0;
3.0 4.0;
2.0 3.0]
+X_vec = [[X[i,1], X[i,2]] for i in 1:size(X, 1)]
X_gen = ([X[i,1], X[i,2]] for i in 1:size(X, 1))
+Y_vec = [[Y[i,1], Y[i,2]] for i in 1:size(Y, 1)]
+Y_gen = ([Y[i,1], Y[i,2]] for i in 1:size(Y, 1))
@testset "covariance" begin
for vd in [1, 2], zm in [true, false], cr in [true, false]
@@ -351,7 +354,7 @@ X_gen = ([X[i,1], X[i,2]] for i in 1:size(X, 1))
@test c ≈ Cxx[1,1]
@inferred cov(x1, corrected=cr)
- @test cov(X) == cov(X_gen) == Statistics.covm(X, mean(X, dims=1))
+ @test cov(X) == cov(X_vec) == cov(X_gen) == Statistics.covm(X, mean(X, dims=1))
C = zm ? Statistics.covm(X, 0, vd, corrected=cr) :
cov(X, dims=vd, corrected=cr)
@test size(C) == (k, k)
@@ -383,7 +386,10 @@ X_gen = ([X[i,1], X[i,2]] for i in 1:size(X, 1))
@test vec(C) ≈ Cxy[:,1]
@inferred cov(X, y1, dims=vd, corrected=cr)
- @test cov(X, Y) == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1))
+ # Separate tests for equality and approximation
+ C = cov(X, Y)
+ @test C == Statistics.covm(X, mean(X, dims=1), Y, mean(Y, dims=1))
+ @test C ≈ cov(X_vec, Y_vec) ≈ cov(X_gen, Y_gen)
C = zm ? Statistics.covm(X, 0, Y, 0, vd, corrected=cr) :
cov(X, Y, dims=vd, corrected=cr)
@test size(C) == (k, k)
@@ -649,12 +655,15 @@ end
@testset "cov and cor of complex arrays (issue #21093)" begin
x = [2.7 - 3.3im, 0.9 + 5.4im, 0.1 + 0.2im, -1.7 - 5.8im, 1.1 + 1.9im]
y = [-1.7 - 1.6im, -0.2 + 6.5im, 0.8 - 10.0im, 9.1 - 3.4im, 2.7 - 5.5im]
- @test cov(x, y) ≈ 4.8365 - 12.119im
- @test cov(y, x) ≈ 4.8365 + 12.119im
+ x_gen = (i for i in x)
+ y_gen = (i for i in y)
+ xy_vec = [[x[i], y[i]] for i in 1:length(x)]
+ @test cov(x, y) ≈ cov(x_gen, y_gen) ≈4.8365 - 12.119im
+ @test cov(y, x) ≈ cov(y_gen, x_gen) ≈ 4.8365 + 12.119im
@test cov(x, reshape(y, :, 1)) ≈ reshape([4.8365 - 12.119im], 1, 1)
@test cov(reshape(x, :, 1), y) ≈ reshape([4.8365 - 12.119im], 1, 1)
@test cov(reshape(x, :, 1), reshape(y, :, 1)) ≈ reshape([4.8365 - 12.119im], 1, 1)
- @test cov([x y]) ≈ [21.779 4.8365-12.119im;
+ @test cov([x y]) ≈ cov(xy_vec) ≈ cov((i for i in xy_vec)) ≈ [21.779 4.8365-12.119im;
4.8365+12.119im 54.548]
@test cor(x, y) ≈ 0.14032104449218274 - 0.35160772008699703im
@test cor(y, x) ≈ 0.14032104449218274 + 0.35160772008699703im
From ff55dac6106e5e95271d6a144b45be6b106a1377 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Thu, 9 Apr 2020 17:19:39 -0400
Subject: [PATCH 06/11] fix docstring
---
src/Statistics.jl | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 92fbeb84..511ab8f8 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -613,7 +613,7 @@ covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corr
"""
cov(x::Any; corrected::Bool=true)
-Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum
+Compute the variance of the iterator `x`. If `corrected` is `true` (the default) then the sum
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n`
is the number of elements in the iterator, which is not necessarily known.
"""
@@ -624,7 +624,8 @@ end
"""
cov(x::AbstractVector; corrected::Bool=true)
-Compute the variance of the vector `x`. If `corrected` is `true` (the default) then the sum
+Compute the variance of the vector `x`. If `x` is a vector of vectors, returns the estimated
+variance-covariance matrix of elements in `x`. If `corrected` is `true` (the default) then the sum
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where `n = length(x)`.
"""
cov(x::AbstractVector; corrected::Bool=true) = covm(x, mean(x); corrected=corrected)
@@ -645,8 +646,9 @@ cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the
default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
``*`` denotes the complex conjugate and `n` is the number of elements in `x` which must equal
-the number of elements in `y`. If `corrected` is `false`, computes ``\\frac{1}{n}\\sum_{i=1}^n
-(x_i-\\bar x) (y_i-\\bar y)^*``.
+the number of elements in `y`. If `x` and `y` are both vectors of vectors, computes the analagous
+estimator for the covariance matrix for `xi` and `yi. If `corrected` is `false`, computes
+``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
cov(x::Any, y::Any; corrected::Bool=true) =
covm(x, mean(x), y, mean(y); corrected=corrected)
From 55fc30aab49f8455c71905ddbee17c9154d98ff9 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Tue, 14 Apr 2020 12:37:25 -0400
Subject: [PATCH 07/11] get rid of covzm
---
src/Statistics.jl | 41 ++---------------------------------------
1 file changed, 2 insertions(+), 39 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 511ab8f8..a0ab2957 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -499,25 +499,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
# covzm (with centered data)
-function covzm(itr::Any; corrected::Bool=true)
- y = iterate(itr)
- if y === nothing
- v = _abs2(zero(eltype(itr)))
- return (v + v) / 0
- end
- count = 1
- value, state = y
- f_value = _abs2(value)
- total = Base.reduce_first(+, f_value)
- y = iterate(itr, state)
- while y !== nothing
- value, state = y
- total += _abs2(value)
- count += 1
- y = iterate(itr, state)
- end
- return total / (count - Int(corrected))
-end
+covzm(itr::Any; corrected::Bool=true) = covm(itr, 0; corrected = corrected)
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
C = unscaled_covzm(x, vardim)
@@ -527,26 +509,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
-function covzm(x::Any, y::Any; corrected::Bool=true)
- z = zip(x, y)
- z_itr = iterate(z)
- if z_itr === nothing
- v = _conjmul(zero(eltype(x)), zero(eltype(y)))
- return (v + v) / 0
- end
- count = 1
- (xi, yi), state = z_itr
- f_value = _conjmul(xi, yi)
- total = Base.reduce_first(+, f_value)
- z_itr = iterate(z, state)
- while z_itr !== nothing
- (xi, yi), state = z_itr
- total += _conjmul(xi, yi)
- count += 1
- z_itr = iterate(z, state)
- end
- return total / (count - Int(corrected))
-end
+covzm(x::Any, y::Any; corrected::Bool=true) = covm(x, 0, y, 0; corrected = corrected)
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
From ddee4e26261a136933a4624324c50e4239f41562 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Sat, 18 Apr 2020 12:01:12 -0400
Subject: [PATCH 08/11] More correlations
---
src/Statistics.jl | 38 ++++++++++++++++++++++++++++++++------
test/runtests.jl | 10 ++++++----
2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index a0ab2957..548bc816 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -486,10 +486,11 @@ _conjmul(x::Number, y::Number) = x * conj(y)
_conjmul(x, y) = x * _conj(y)'
# core functions
-
+unscaled_covzm(itr) = sum(_abs2, itr)
unscaled_covzm(x::AbstractVector) = sum(_abs2, x)
unscaled_covzm(x::AbstractMatrix, vardim::Int) = (vardim == 1 ? _conj(x'x) : x * x')
+unscaled_covzm(x, y) = sum(t -> _conjmul(first(t), last(t)), zip(x, y))
unscaled_covzm(x::AbstractVector, y::AbstractVector) = sum(_conjmul(x[i], y[i]) for i in eachindex(y, x))
unscaled_covzm(x::AbstractVector, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(transpose(x), transpose(_conj(y))))
@@ -499,7 +500,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
# covzm (with centered data)
-covzm(itr::Any; corrected::Bool=true) = covm(itr, 0; corrected = corrected)
+covzm(itr::Any; corrected::Bool=true) = covm(itr, zero(first(itr)); corrected = corrected)
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
C = unscaled_covzm(x, vardim)
@@ -509,7 +510,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
-covzm(x::Any, y::Any; corrected::Bool=true) = covm(x, 0, y, 0; corrected = corrected)
+covzm(x::Any, y::Any; corrected::Bool=true) = covm(x, zero(first(x)), y, zero(first(y)); corrected = corrected)
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -701,7 +702,20 @@ end
# corzm (non-exported, with centered data)
-corzm(x::AbstractVector{T}) where {T} = one(real(T))
+function corzm(itr)
+ T = eltype(itr)
+ if T <: Number
+ return one(real(T))
+ else
+ c = unscaled_covzm(itr)
+ return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
+ end
+end
+corzm(x::AbstractVector{T}) where {T<:Number} = one(real(T))
+function corzm(x::AbstractVector{T}) where {T}
+ c = unscaled_covzm(x)
+ return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
+end
function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
@@ -714,8 +728,20 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))
# corm
-
-corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
+function corm(itr::Any, itrmean)
+ T = eltype(itr)
+ if T <: Number
+ return one(real(T))
+ else
+ c = sum(x -> _abs2(x - itrmean), itr)
+ return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
+ end
+end
+corm(x::AbstractVector{T}, xmean) where {T<:Number} = one(real(T))
+function corzm(x::AbstractVector{T}, xmean) where {T}
+ c = unscaled_covzm(x .- xmean)
+ return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
+end
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
diff --git a/test/runtests.jl b/test/runtests.jl
index 57859b79..dc89f44d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -439,17 +439,18 @@ end
end
c = zm ? Statistics.corm(x1, 0) : cor(x1)
+ c_gen = zm ? Statistics.corm((xi for xi in x1), 0) : cor(xi for xi in x1)
@test isa(c, Float64)
- @test c ≈ Cxx[1,1]
+ @test c ≈ c_gen ≈ Cxx[1,1]
@inferred cor(x1)
- @test cor(X) == Statistics.corm(X, mean(X, dims=1))
+ @test cor(X) == cor(X_vec) == cor(X_gen) == Statistics.corm(X, mean(X, dims=1))
C = zm ? Statistics.corm(X, 0, vd) : cor(X, dims=vd)
@test size(C) == (k, k)
@test C ≈ Cxx
@inferred cor(X, dims=vd)
- @test cor(x1, y1) == Statistics.corm(x1, mean(x1), y1, mean(y1))
+ @test cor(x1, y1) == cor((ix for ix in x1), (iy for iy in y1)) == Statistics.corm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.corm(x1, 0, y1, 0) : cor(x1, y1)
@test isa(c, Float64)
@test c ≈ Cxy[1,1]
@@ -471,7 +472,8 @@ end
@test vec(C) ≈ Cxy[:,1]
@inferred cor(X, y1, dims=vd)
- @test cor(X, Y) == Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1))
+ @test cor(X, Y) == cor(X_vec, Y_vec) == cor(X_gen, Y_gen) ==
+ Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1))
C = zm ? Statistics.corm(X, 0, Y, 0, vd) : cor(X, Y, dims=vd)
@test size(C) == (k, k)
@test C ≈ Cxy
From 056af67e78cfe20525e99eaf3b36803ae47d88b3 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Sat, 18 Apr 2020 13:44:09 -0400
Subject: [PATCH 09/11] x, y generators
---
src/Statistics.jl | 28 ++++++++++++++++++++++++++--
1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 548bc816..5397995b 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -738,12 +738,36 @@ function corm(itr::Any, itrmean)
end
end
corm(x::AbstractVector{T}, xmean) where {T<:Number} = one(real(T))
-function corzm(x::AbstractVector{T}, xmean) where {T}
+function corm(x::AbstractVector{T}, xmean) where {T}
c = unscaled_covzm(x .- xmean)
return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
end
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
-function corm(x::AbstractVector, mx, y::AbstractVector, my)
+
+function corm(x::Any, xm, y::Any, ym)
+ if first(x) isa Number
+
+ else
+ z = zip(x, y)
+ x1, y1 = first(z)
+ nx = length(x1)
+ c = zero(_conjmul(x1 - xm, y1 - ym))
+ sx = zero(abs2.(x1))
+ sy = zero(abs2.(y1))
+ for (xi, yi) in z
+ c += _conjmul(xi - xm, yi - ym)
+ for j in 1:nx
+ sx[j] += abs2(xi[j] - xm[j])
+ sy[j] += abs2(yi[j] - ym[j])
+ end
+ end
+ return cov2cor!(c, sqrt!(sx), sqrt!(sy))
+ end
+end
+function corm(x::AbstractVector, xm, y::AbstractVector, ym)
+ println("Not yet implemented")
+end
+function corm(x::AbstractVector{<:Number}, mx, y::AbstractVector{<:Number}, my)
require_one_based_indexing(x, y)
n = length(x)
length(y) == n || throw(DimensionMismatch("inconsistent lengths"))
From 1a15b52e737ff417a715e2982007d3c99c890209 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Sun, 19 Apr 2020 11:14:26 -0400
Subject: [PATCH 10/11] Working on tests, still using iterate
---
src/Statistics.jl | 121 +++++++++++++++++++++++++++++++++++-----------
test/runtests.jl | 4 +-
2 files changed, 94 insertions(+), 31 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 5397995b..937864ba 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -388,6 +388,8 @@ function sqrt!(A::AbstractArray)
A
end
+sqrt!(x::Number) = sqrt(x)
+
stdm(A::AbstractArray, m; corrected::Bool=true) =
sqrt.(varm(A, m; corrected=corrected))
@@ -729,43 +731,75 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
# corm
function corm(itr::Any, itrmean)
- T = eltype(itr)
- if T <: Number
- return one(real(T))
+ if itrmean isa Number
+ return one(real(typeof(itrmean)))
else
c = sum(x -> _abs2(x - itrmean), itr)
return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
end
end
-corm(x::AbstractVector{T}, xmean) where {T<:Number} = one(real(T))
-function corm(x::AbstractVector{T}, xmean) where {T}
- c = unscaled_covzm(x .- xmean)
+corm(x::AbstractVector{<:Number}, xmean) = one(real(eltype(x)))
+function corm(x::AbstractVector, xmean)
+ c = sum(t -> _abs2(t - xmean), x)
return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
end
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
-
+_abs2!(x::Number) = abs2(x)
+function _abs2!(x)
+ @inbounds @simd for i in eachindex(x)
+ x[i] = abs2(x[i])
+ end
+ return x
+end
function corm(x::Any, xm, y::Any, ym)
- if first(x) isa Number
-
+ z = zip(x, y)
+ z_itr = iterate(z)
+ if z_itr === nothing
+ ArgumentError("correlation only defined for non-empty iterators")
+ end
+ (xi, yi), state = z_itr
+ zx = xi - xm
+ zy = yi - ym
+ c = Base.reduce_first(+, _conjmul(zx, zy))
+ sx = Base.reduce_first(+, _abs2!(zx))
+ sy = Base.reduce_first(+, _abs2!(zy))
+ z_itr = iterate(z, state)
+ while z_itr !== nothing
+ (xi, yi), state = z_itr
+ zx = xi - xm
+ zy = yi - ym
+ c += _conjmul(zx, zy)
+ sx += _abs2!(zx)
+ sy += _abs2!(zy)
+ z_itr = iterate(z, state)
+ end
+ if c isa Number
+ clampcor(c / max(sx, sy) / sqrt(min(sx, sy) / max(sx, sy)))
else
- z = zip(x, y)
- x1, y1 = first(z)
- nx = length(x1)
- c = zero(_conjmul(x1 - xm, y1 - ym))
- sx = zero(abs2.(x1))
- sy = zero(abs2.(y1))
- for (xi, yi) in z
- c += _conjmul(xi - xm, yi - ym)
- for j in 1:nx
- sx[j] += abs2(xi[j] - xm[j])
- sy[j] += abs2(yi[j] - ym[j])
- end
- end
- return cov2cor!(c, sqrt!(sx), sqrt!(sy))
+ cov2cor!(c, sqrt!(sx), sqrt!(sy))
end
end
function corm(x::AbstractVector, xm, y::AbstractVector, ym)
- println("Not yet implemented")
+ require_one_based_indexing(x, y)
+ n = length(x)
+ length(y) == n || throw(DimensionMismatch("inconsistent lengths"))
+ n > 0 || throw(ArgumentError("correlation only defined for non-empty vectors"))
+
+ @inbounds begin
+ # Initialize the accumulators
+ xx = zero(sqrt!(abs2.(x[1])))
+ yy = zero(sqrt!(abs2.(y[1])))
+ xy = zero(_conjmul(x[1], y[1]))
+
+ @simd for i in eachindex(x, y)
+ xi = x[i] - xm
+ yi = y[i] - ym
+ xy += _conjmul(xi, yi)
+ xx += _abs2!(xi)
+ yy += _abs2!(yi)
+ end
+ end
+ return cov2cor!(xy, sqrt!(xx), sqrt!(yy))
end
function corm(x::AbstractVector{<:Number}, mx, y::AbstractVector{<:Number}, my)
require_one_based_indexing(x, y)
@@ -782,9 +816,9 @@ function corm(x::AbstractVector{<:Number}, mx, y::AbstractVector{<:Number}, my)
@simd for i in eachindex(x, y)
xi = x[i] - mx
yi = y[i] - my
- xx += abs2(xi)
- yy += abs2(yi)
- xy += xi * yi'
+ xx += _abs2(xi)
+ yy += _abs2(yi)
+ xy += _conjmul(xi, yi)
end
end
return clampcor(xy / max(xx, yy) / sqrt(min(xx, yy) / max(xx, yy)))
@@ -795,11 +829,33 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
# cor
"""
- cor(x::AbstractVector)
+ cor(itr::Any)
+
+Return the number one if `itr` iterates through scalars. Returns the correlation between
+elements of the iterator otherwise.
+"""
+function cor(itr::Any)
+ t = first(itr)
+ if t isa Number
+ return one(real(typeof(t)))
+ else
+ return corm(itr, mean(itr))
+ end
+end
+
+"""
+ cor(x::AbstractVector{<:Number})
Return the number one.
"""
-cor(x::AbstractVector) = one(real(eltype(x)))
+cor(x::AbstractVector{<:Number}) = one(real(eltype(x)))
+
+"""
+ cor(x::AbstractVector)
+
+Return the Pearson correlation matrix between elements in `x`.
+"""
+cor(x::AbstractVector) = corm(x, mean(x))
"""
cor(X::AbstractMatrix; dims::Int=1)
@@ -808,6 +864,13 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di
"""
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
+"""
+ cor(x::Any, y::Any)
+
+Compute the Pearson correlation between the iterators `x` and `y`.
+"""
+cor(x::Any, y::Any) = corm(x, mean(x), y, mean(y))
+
"""
cor(x::AbstractVector, y::AbstractVector)
diff --git a/test/runtests.jl b/test/runtests.jl
index dc89f44d..4213ba25 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -444,7 +444,7 @@ end
@test c ≈ c_gen ≈ Cxx[1,1]
@inferred cor(x1)
- @test cor(X) == cor(X_vec) == cor(X_gen) == Statistics.corm(X, mean(X, dims=1))
+ @test cor(X) == Statistics.corm(X, mean(X, dims=1)) == cor(X_vec) == cor(X_gen)
C = zm ? Statistics.corm(X, 0, vd) : cor(X, dims=vd)
@test size(C) == (k, k)
@test C ≈ Cxx
@@ -472,7 +472,7 @@ end
@test vec(C) ≈ Cxy[:,1]
@inferred cor(X, y1, dims=vd)
- @test cor(X, Y) == cor(X_vec, Y_vec) == cor(X_gen, Y_gen) ==
+ @test_skip cor(X, Y) == cor(X_vec, Y_vec) == cor(X_gen, Y_gen) ==
Statistics.corm(X, mean(X, dims=1), Y, mean(Y, dims=1))
C = zm ? Statistics.corm(X, 0, Y, 0, vd) : cor(X, Y, dims=vd)
@test size(C) == (k, k)
From 2aa01873158c5d63b3ff5b611726008bb98e3595 Mon Sep 17 00:00:00 2001
From: pdeffebach
Date: Mon, 20 Apr 2020 15:27:53 -0400
Subject: [PATCH 11/11] start converting to collect
---
src/Statistics.jl | 171 +++++++++++-----------------------------------
1 file changed, 39 insertions(+), 132 deletions(-)
diff --git a/src/Statistics.jl b/src/Statistics.jl
index 937864ba..b43b68f3 100644
--- a/src/Statistics.jl
+++ b/src/Statistics.jl
@@ -502,7 +502,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))
# covzm (with centered data)
-covzm(itr::Any; corrected::Bool=true) = covm(itr, zero(first(itr)); corrected = corrected)
+covzm(itr::Any; corrected::Bool=true) = covzm(collect(itr); corrected = corrected)
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
C = unscaled_covzm(x, vardim)
@@ -512,7 +512,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
-covzm(x::Any, y::Any; corrected::Bool=true) = covm(x, zero(first(x)), y, zero(first(y)); corrected = corrected)
+covzm(x::Any, y::Any; corrected::Bool=true) = covzm(collect(x), collect(y); corrected = corrected)
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
@@ -527,49 +527,14 @@ end
# covm (with provided mean)
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
## which can't be handled by broadcast
-function covm(itr::Any, itrmean; corrected::Bool=true)
- y = iterate(itr)
- if y === nothing
- v = _abs2(zero(eltype(itr - itrmean)))
- return (v + v) / 0
- end
- count = 1
- itri, state = y
- first_value = _abs2(itri - itrmean)
- total = Base.reduce_first(+, first_value)
- y = iterate(itr, state)
- while y !== nothing
- itri, state = y
- total += _abs2(itri - itrmean)
- count += 1
- y = iterate(itr, state)
- end
- return total / (count - Int(corrected))
-end
+covm(itr::Any, itrmean; corrected::Bool=true) =
+ covm(map(t -> t - itrmean, x); corrected = corrected)
covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
-function covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true)
- z = zip(x, y)
- z_itr = iterate(z)
- if z_itr === nothing
- v = _conjmul(zero(eltype(x)), zero(eltype(y)))
- return (v + v) / 0
- end
- count = 1
- (xi, yi), state = z_itr
- first_value = _conjmul(xi-xmean, yi-ymean)
- total = Base.reduce_first(+, first_value)
- z_itr = iterate(z, state)
- while z_itr !== nothing
- (xi, yi), state = z_itr
- total += _conjmul(xi-xmean, yi-ymean)
- count += 1
- z_itr = iterate(z, state)
- end
- return total / (count - Int(corrected))
-end
+covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) =
+ covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
@@ -584,7 +549,8 @@ is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `fals
is the number of elements in the iterator, which is not necessarily known.
"""
function cov(x::Any, corrected::Bool=true)
- covm(x, mean(x); corrected=corrected)
+ cx = collect(x)
+ covm(cx, mean(cx); corrected=corrected)
end
"""
@@ -616,8 +582,11 @@ the number of elements in `y`. If `x` and `y` are both vectors of vectors, compu
estimator for the covariance matrix for `xi` and `yi. If `corrected` is `false`, computes
``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
-cov(x::Any, y::Any; corrected::Bool=true) =
- covm(x, mean(x), y, mean(y); corrected=corrected)
+function cov(x::Any, y::Any; corrected::Bool=true)
+ cx = collect(x)
+ cy = collect(y)
+ covm(cx, mean(cx), cy, mean(cy); corrected=corrected)
+end
"""
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
@@ -701,18 +670,14 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
end
return C
end
-
+function cov2cor!(xy::Number, xsd::Number, ysd::Number)
+ xx = abs2(xsd)
+ yy = abs2(ysd)
+ clampcor(xy / max(xx, yy) / sqrt(min(xx, yy) / max(xx, yy)))
+end
# corzm (non-exported, with centered data)
-function corzm(itr)
- T = eltype(itr)
- if T <: Number
- return one(real(T))
- else
- c = unscaled_covzm(itr)
- return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
- end
-end
+corzm(itr) = corzm(collect(itr))
corzm(x::AbstractVector{T}) where {T<:Number} = one(real(T))
function corzm(x::AbstractVector{T}) where {T}
c = unscaled_covzm(x)
@@ -730,56 +695,17 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))
# corm
-function corm(itr::Any, itrmean)
- if itrmean isa Number
- return one(real(typeof(itrmean)))
- else
- c = sum(x -> _abs2(x - itrmean), itr)
- return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
- end
-end
+corm(itr::Any, itrmean) = corm(collect(itr), itrmean)
corm(x::AbstractVector{<:Number}, xmean) = one(real(eltype(x)))
function corm(x::AbstractVector, xmean)
c = sum(t -> _abs2(t - xmean), x)
return cov2cor!(c, collect(sqrt(c[i,i]) for i in 1:min(size(c)...)))
end
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
-_abs2!(x::Number) = abs2(x)
-function _abs2!(x)
- @inbounds @simd for i in eachindex(x)
- x[i] = abs2(x[i])
- end
- return x
-end
-function corm(x::Any, xm, y::Any, ym)
- z = zip(x, y)
- z_itr = iterate(z)
- if z_itr === nothing
- ArgumentError("correlation only defined for non-empty iterators")
- end
- (xi, yi), state = z_itr
- zx = xi - xm
- zy = yi - ym
- c = Base.reduce_first(+, _conjmul(zx, zy))
- sx = Base.reduce_first(+, _abs2!(zx))
- sy = Base.reduce_first(+, _abs2!(zy))
- z_itr = iterate(z, state)
- while z_itr !== nothing
- (xi, yi), state = z_itr
- zx = xi - xm
- zy = yi - ym
- c += _conjmul(zx, zy)
- sx += _abs2!(zx)
- sy += _abs2!(zy)
- z_itr = iterate(z, state)
- end
- if c isa Number
- clampcor(c / max(sx, sy) / sqrt(min(sx, sy) / max(sx, sy)))
- else
- cov2cor!(c, sqrt!(sx), sqrt!(sy))
- end
-end
-function corm(x::AbstractVector, xm, y::AbstractVector, ym)
+corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my)
+_one(x) = one(x)
+_one(x::AbstractArray) = fill(one(x[1]), size(x))
+function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
n = length(x)
length(y) == n || throw(DimensionMismatch("inconsistent lengths"))
@@ -787,32 +713,10 @@ function corm(x::AbstractVector, xm, y::AbstractVector, ym)
@inbounds begin
# Initialize the accumulators
- xx = zero(sqrt!(abs2.(x[1])))
- yy = zero(sqrt!(abs2.(y[1])))
+ xx = zero(sqrt!(_abs2(_one(x[1]))))
+ yy = zero(sqrt!(_abs2(_one(y[1]))))
xy = zero(_conjmul(x[1], y[1]))
- @simd for i in eachindex(x, y)
- xi = x[i] - xm
- yi = y[i] - ym
- xy += _conjmul(xi, yi)
- xx += _abs2!(xi)
- yy += _abs2!(yi)
- end
- end
- return cov2cor!(xy, sqrt!(xx), sqrt!(yy))
-end
-function corm(x::AbstractVector{<:Number}, mx, y::AbstractVector{<:Number}, my)
- require_one_based_indexing(x, y)
- n = length(x)
- length(y) == n || throw(DimensionMismatch("inconsistent lengths"))
- n > 0 || throw(ArgumentError("correlation only defined for non-empty vectors"))
-
- @inbounds begin
- # Initialize the accumulators
- xx = zero(sqrt(abs2(one(x[1]))))
- yy = zero(sqrt(abs2(one(y[1]))))
- xy = zero(x[1] * y[1]')
-
@simd for i in eachindex(x, y)
xi = x[i] - mx
yi = y[i] - my
@@ -821,7 +725,12 @@ function corm(x::AbstractVector{<:Number}, mx, y::AbstractVector{<:Number}, my)
xy += _conjmul(xi, yi)
end
end
- return clampcor(xy / max(xx, yy) / sqrt(min(xx, yy) / max(xx, yy)))
+
+ @show xx
+ @show yy
+ @show xy
+
+ return cov2cor!(xy, xx, yy)
end
corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
@@ -834,14 +743,7 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
Return the number one if `itr` iterates through scalars. Returns the correlation between
elements of the iterator otherwise.
"""
-function cor(itr::Any)
- t = first(itr)
- if t isa Number
- return one(real(typeof(t)))
- else
- return corm(itr, mean(itr))
- end
-end
+cor(itr::Any) = cor(collect(itr))
"""
cor(x::AbstractVector{<:Number})
@@ -869,7 +771,12 @@ cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)
Compute the Pearson correlation between the iterators `x` and `y`.
"""
-cor(x::Any, y::Any) = corm(x, mean(x), y, mean(y))
+function cor(x::Any, y::Any)
+ cx = collect(x)
+ cy = collect(y)
+
+ corm(cx, mean(cx), cy, mean(cy))
+end
"""
cor(x::AbstractVector, y::AbstractVector)