Skip to content

Commit beb563c

Browse files
Merge pull request #167 from t-bltg/rounding
fix subnormal `sqrt` and `cbrt`
2 parents 8a31603 + 5b99418 commit beb563c

File tree

5 files changed

+25
-5
lines changed

5 files changed

+25
-5
lines changed

.DS_Store

-6 KB
Binary file not shown.

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
*.jl.*.cov
33
*.jl.mem
44
deps/deps.jl
5+
.DS_Store

src/.DS_Store

-6 KB
Binary file not shown.

src/math/ops/op_dd_dd.jl

+9-5
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,15 @@ end
5757

5858

5959
function sqrt_dd_dd(x::Tuple{T,T}) where {T<:IEEEFloat}
60-
(isnan(HI(x)) | iszero(HI(x))) && return x
61-
signbit(HI(x)) && throw(DomainError("sqrt(x) expects x >= 0"))
60+
hi, lo = x
61+
(isnan(hi) | iszero(hi)) && return x
62+
signbit(hi) && throw(DomainError("sqrt(x) expects x >= 0"))
6263

6364
half = T(0.5)
6465
dhalf = (half, zero(T))
6566

66-
r = inv(sqrt(HI(x)))
67-
h = (HI(x) * half, LO(x) * half)
67+
r = inv(sqrt(hi))
68+
h = (hi * half, lo * half)
6869

6970
r2 = mul_fpfp_dd(r, r)
7071
hr2 = mul_dddd_dd(h, r2)
@@ -79,6 +80,7 @@ function sqrt_dd_dd(x::Tuple{T,T}) where {T<:IEEEFloat}
7980
r = add_dddd_dd(r, radj)
8081

8182
r = mul_dddd_dd(r, x)
83+
isnan(HI(r)) && return DoubleFloat{T}(sqrt(hi), zero(T)) # subnormal numbers
8284

8385
return r
8486
end
@@ -98,7 +100,7 @@ invcuberootsquared(A) is found iteratively using Newton's method with a final ap
98100
=#
99101

100102
function cbrt_dd_dd(a::Tuple{T,T}) where {T<:IEEEFloat}
101-
hi, lo = HILO(a)
103+
isnan(HI(a)) && return a
102104
a2 = mul_dddd_dd(a,a)
103105
one1 = one(T)
104106
onethird = (0.3333333333333333, 1.850371707708594e-17)
@@ -149,5 +151,7 @@ function cbrt_dd_dd(a::Tuple{T,T}) where {T<:IEEEFloat}
149151
amax3 = mul_dddd_dd(amax3, onethird)
150152

151153
ax = add_dddd_dd(ax, amax3)
154+
155+
isnan(HI(ax)) && return DoubleFloat{T}(cbrt(HI(a)), zero(T)) # subnormal numbers
152156
return ax
153157
end

test/concrete_accuracy.jl

+15
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,18 @@
4646
@test typeof(rand(Double32)) == Double32
4747
@test typeof(rand(Double16)) == Double16
4848
end
49+
50+
@testset "Subnormal numbers" begin
51+
for (F, D) in (
52+
(Float64, Double64),
53+
(Float32, Double32),
54+
(Float16, Double16),
55+
)
56+
# NOTE: using `prevfloat(floatmin(F))` doesn't expose `sqrt(D(sn))` returning a `NaN`
57+
sn = D(floatmin(F) / 10)
58+
59+
@test issubnormal(sn)
60+
@test sqrt(F(sn)) == F(sqrt(D(sn)))
61+
@test cbrt(F(sn)) == F(cbrt(D(sn)))
62+
end
63+
end

0 commit comments

Comments
 (0)