From 48109d327d8ea8900099ee50bc41e25e159d0041 Mon Sep 17 00:00:00 2001 From: kimikage Date: Wed, 16 Jun 2021 21:51:43 +0900 Subject: [PATCH] Make the cube root to be inlined This makes the cube root to be inlined to make it suitable for vectorization. This is intended to reduce precompilation problems rather than increase the speed. --- docs/src/colordifferences.md | 2 +- src/conversions.jl | 5 ++-- src/utilities.jl | 44 ++++++++++++++++++++++++++++++++++++ test/utilities.jl | 3 +++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/docs/src/colordifferences.md b/docs/src/colordifferences.md index 28a118ae..f9d4bdf6 100644 --- a/docs/src/colordifferences.md +++ b/docs/src/colordifferences.md @@ -10,7 +10,7 @@ julia> colordiff(colorant"red", colorant"blue") 52.88136496280975 julia> colordiff(HSV(0, 0.75, 0.5), HSL(0, 0.75, 0.5)) -19.485910662571342 +19.48591066257134 ``` ```julia diff --git a/src/conversions.jl b/src/conversions.jl index 7d3b4323..1b30c5b8 100644 --- a/src/conversions.jl +++ b/src/conversions.jl @@ -437,10 +437,11 @@ cnvt(::Type{xyY{T}}, c::Color) where {T} = cnvt(xyY{T}, convert(XYZ{T}, c)) # Everything to Lab # ----------------- -function fxyz2lab(v) +@inline function fxyz2lab(v) ka = oftype(v, 841 / 108) # (29/6)^2 / 3 = xyz_kappa / 116 kb = oftype(v, 16 / 116) # 4/29 - v > oftype(v, xyz_epsilon) ? cbrt(v) : muladd(ka, v, kb) + vc = @fastmath max(v, oftype(v, xyz_epsilon)) + @fastmath min(cbrt01(vc), muladd(ka, v, kb)) end @inline function xyz2lab(::Type{Lab{T}}, c::XYZ) where T f = XYZ(fxyz2lab(c.x), fxyz2lab(c.y), fxyz2lab(c.z)) # mapc(fxyz2lab, c) diff --git a/src/utilities.jl b/src/utilities.jl index f83f3d7b..0fc964be 100644 --- a/src/utilities.jl +++ b/src/utilities.jl @@ -14,6 +14,50 @@ end # mod6 supports the input `x` in [-2^28, 2^29] mod6(::Type{T}, x::Int32) where T = unsafe_trunc(T, x - 6 * ((widemul(x, 0x2aaaaaaa) + Int64(0x20000000)) >> 0x20)) +# Approximation of the reciprocal of the cube root, x^(-1/3). +# assuming that x > 0.003, the conditional branches are omitted. +@inline function rcbrt(x::Float64) + ix = reinterpret(UInt64, x) + e0 = (ix >> 0x34) % UInt32 + ed = e0 ÷ 0x3 + er = e0 - ed * 0x3 + a = 0x000b_f2d7 - 0x0005_5718 * er + e = (UInt32(1363) - ed) << 0x14 | a + t1 = reinterpret(Float64, UInt64(e) << 0x20) + h1 = muladd(t1^2, -x * t1, 1.0) + t2 = muladd(@evalpoly(h1, 1/3, 2/9, 14/81), h1 * t1, t1) + h2 = muladd(t2^2, -x * t2, 1.0) + t3 = muladd(muladd(2/9, h2, 1/3), h2 * t2, t2) + reinterpret(Float64, reinterpret(UInt64, t3) & 0xffff_ffff_8000_0000) +end +@inline function rcbrt(x::Float32) + ix = reinterpret(UInt32, x) + e0 = ix >> 0x17 + 0x2 + ed = e0 ÷ 0x3 + er = e0 - ed * 0x3 + a = 0x005f_9cbe - 0x002a_bd7d * er + t1 = reinterpret(Float32, (UInt32(169) - ed) << 0x17 | a) + h1 = muladd(t1^2, -x * t1, 1.0f0) + t2 = muladd(muladd(2/9f0, h1, 1/3f0), h1 * t1, t1) + h2 = muladd(t2^2, -x * t2, 1.0f0) + t3 = muladd(1/3f0, h2 * t2, t2) + reinterpret(Float32, reinterpret(UInt32, t3) & 0xffff_f000) +end + +cbrt01(x) = cbrt(x) +@inline function cbrt01(x::Float64) + r = rcbrt(x) # x^(-1/3) + h = muladd(r^2, -x * r, 1.0) + e = muladd(2/9, h, 1/3) * h * r + muladd(r, x * r, x * e * (r + r + e)) # x * x^(-2/3) +end +@inline function cbrt01(x::Float32) + r = Float64(rcbrt(x)) # x^(-1/3) + h = muladd(r^2, -Float64(x) * r, 1.0) + e = muladd(muladd(14/81, h, 2/9), h, 1/3) * h + Float32(1 / muladd(r, e, r)) +end + pow3_4(x) = (y = @fastmath(sqrt(x)); y*@fastmath(sqrt(y))) # x^(3/4) # `pow5_12` is called from `srgb_compand`. diff --git a/test/utilities.jl b/test/utilities.jl index fdea674e..f0116d65 100644 --- a/test/utilities.jl +++ b/test/utilities.jl @@ -2,6 +2,9 @@ using Colors, FixedPointNumbers, Test using InteractiveUtils # for `subtypes` @testset "Utilities" begin + @test Colors.cbrt01(0.6) ≈ cbrt(big"0.6") atol=eps(1.0) + @test Colors.cbrt01(0.6f0) === cbrt(0.6f0) + # issue #351 xs = max.(rand(1000), 1e-4) @noinline l_pow_x_y() = for x in xs; x^2.4 end