Merge pull request #15 from nossleinad/main

murrellb · web-flow · commit 53f75c0eae3e · 2024-03-06T14:39:32.000+01:00
Add Brent's method for minimization
diff --git a/src/MolecularEvolution.jl b/src/MolecularEvolution.jl
@@ -29,6 +29,8 @@ abstract type SimulationModel <: BranchModel end #Simulation models typically ca
 
 abstract type StatePath end
 
+abstract type UnivariateOpt end
+
 #include("core/core.jl")
 include("core/nodes/nodes.jl")
 include("core/algorithms/algorithms.jl")
@@ -116,6 +118,10 @@ export
     one_hot_sample,
     scaled_prob_domain,
     golden_section_maximize,
+    GoldenSectionOpt,
+    brents_method_minimize,
+    BrentsMethodOpt,
+    univariate_maximize,
     unit_transform,
     HKY85,
     P_from_diagonalized_Q,
diff --git a/src/core/algorithms/branchlength_optim.jl b/src/core/algorithms/branchlength_optim.jl
@@ -27,7 +27,8 @@ function branchlength_optim!(
     node::FelNode,
     models,
     partition_list,
-    tol,
+    tol;
+    bl_optimizer::UnivariateOpt = GoldenSectionOpt()
 )
 
     #This bit of code should be identical to the regular downward pass...
@@ -60,6 +61,7 @@ function branchlength_optim!(
                 models,
                 partition_list,
                 tol,
+                bl_optimizer=bl_optimizer
             )
         end
         #Then combine node.child_messages into node.message...
@@ -72,7 +74,7 @@ function branchlength_optim!(
     if !isroot(node)
         model_list = models(node)
         fun = x -> branch_LL_up(x, temp_message, node, model_list, partition_list)
-        opt = golden_section_maximize(fun, 0 + tol, 1 - tol, unit_transform, tol)
+        opt = univariate_maximize(fun, 0 + tol, 1 - tol, unit_transform, bl_optimizer, tol)
         if fun(opt) > fun(node.branchlength)
             node.branchlength = opt
         end
@@ -88,24 +90,24 @@ end
 
 #BM: Check if running felsenstein_down! makes a difference.
 """
-    branchlength_optim!(tree::FelNode, models; partition_list = nothing, tol = 1e-5)
+    branchlength_optim!(tree::FelNode, models; partition_list = nothing, tol = 1e-5, bl_optimizer::UnivariateOpt = GoldenSectionOpt())
 
-Uses golden section search to optimize all branches recursively, maintaining the integrity of the messages.
+Uses golden section search, or optionally Brent's method, to optimize all branches recursively, maintaining the integrity of the messages.
 Requires felsenstein!() to have been run first.
 models can either be a single model (if the messages on the tree contain just one Partition) or an array of models, if the messages have >1 Partition, or 
 a function that takes a node, and returns a Vector{<:BranchModel} if you need the models to vary from one branch to another.
 partition_list (eg. 1:3 or [1,3,5]) lets you choose which partitions to run over (but you probably want to optimize branch lengths with all models).
-tol is the tolerance for the golden section search.
+tol is the absolute tolerance for the bl_optimizer which defaults to golden section search, and has Brent's method as an option by setting bl_optimizer=BrentsMethodOpt().
 """
-function branchlength_optim!(tree::FelNode, models; partition_list = nothing, tol = 1e-5)
+function branchlength_optim!(tree::FelNode, models; partition_list = nothing, tol = 1e-5, bl_optimizer::UnivariateOpt = GoldenSectionOpt())
     temp_message = deepcopy(tree.message)
     message_to_set = deepcopy(tree.message)
 
     if partition_list === nothing
         partition_list = 1:length(tree.message)
     end
 
-    branchlength_optim!(temp_message, message_to_set, tree, models, partition_list, tol)
+    branchlength_optim!(temp_message, message_to_set, tree, models, partition_list, tol, bl_optimizer=bl_optimizer)
 end
 
 #Overloading to allow for direct model and model vec inputs
@@ -114,10 +116,12 @@ branchlength_optim!(
     models::Vector{<:BranchModel};
     partition_list = nothing,
     tol = 1e-5,
-) = branchlength_optim!(tree, x -> models, partition_list = partition_list, tol = tol)
+    bl_optimizer::UnivariateOpt = GoldenSectionOpt()
+) = branchlength_optim!(tree, x -> models, partition_list = partition_list, tol = tol, bl_optimizer=bl_optimizer)
 branchlength_optim!(
     tree::FelNode,
     model::BranchModel;
     partition_list = nothing,
     tol = 1e-5,
-) = branchlength_optim!(tree, x -> [model], partition_list = partition_list, tol = tol)
+    bl_optimizer::UnivariateOpt = GoldenSectionOpt()
+) = branchlength_optim!(tree, x -> [model], partition_list = partition_list, tol = tol, bl_optimizer=bl_optimizer)
diff --git a/src/utils/simple_optim.jl b/src/utils/simple_optim.jl
@@ -12,6 +12,9 @@ function unit_inv_transform(x::Real; k = 1.0)
     x / (x + k)
 end
 
+struct GoldenSectionOpt <: UnivariateOpt end
+struct BrentsMethodOpt <: UnivariateOpt end
+
 """
 Golden section search.
 
@@ -67,6 +70,136 @@ function golden_section_maximize(f, a::Real, b::Real, transform, tol::Real)
     end
 end
 
+"""
+    univariate_maximize(f, a::Real, b::Real, transform, optimizer::GoldenSectionOpt, tol::Real)
+Maximizes `f(x)` using a Golden Section Search. See `?golden_section_maximize`.
+# Examples
+
+```jldoctest
+julia> f(x) = -(x-2)^2
+f (generic function with 1 method)
+
+julia> m = univariate_maximize(f, 1, 5, identity, GoldenSectionOpt(), 1e-10)
+2.0000000000051843
+```
+"""
+function univariate_maximize(f, a::Real, b::Real, transform, optimizer::GoldenSectionOpt, tol::Real)
+    return golden_section_maximize(f, a, b, transform, tol)
+end
+
+function brents_pq(x, w, v, fx, fw, fv)
+    #These are some values used by the SPI in  Brent's method
+    #x_new = x + p / q
+    p = (x - v)^2 * (fx - fw) - (x - w)^2 * (fx - fv)
+    q = 2 * ((x - v) * (fx - fw) - (x - w) * (fx - fv))
+    if q > 0
+        p = -p
+    end
+    q = abs(q)
+    return p, q
+end
+
+function SPI_is_well_behaved(a, b, x, p, q, prev_prev_e, tol)
+    return (q != 0 && a < x + p / q < b && abs(p / q) < abs(prev_prev_e) / 2 && abs(prev_prev_e) > tol)
+end
+
+"""
+    brents_method_minimize(f, a::Real, b::Real, transform, t::Real; ε::Real=sqrt(eps()))
+Brent's method for minimization.
+
+Given a function f with a single local minimum in
+the interval (a,b), Brent's method returns an approximation
+of the x-value that minimizes f to an accuaracy between 2tol and 3tol,
+where tol is a combination of a relative and an absolute tolerance,
+tol := ε|x| + t. ε should be no smaller `2*eps`,
+and preferably not much less than `sqrt(eps)`, which is also the default value.
+eps is defined here as the machine epsilon in double precision.
+t should be positive.
+
+The method combines the stability of a Golden Section Search and the superlinear convergence
+Successive Parabolic Interpolation has under certain conditions. The method never converges much slower
+than a Fibonacci search and for a sufficiently well-behaved f, convergence can be exptected to be superlinear,
+with an order that's usually atleast 1.3247...
+
+# Examples
+
+```jldoctest
+julia> f(x) = exp(-x) - cos(x)
+f (generic function with 1 method)
+
+julia> m = brents_method_minimize(f, -1, 2, identity, 1e-7)
+0.5885327257940255
+```
+
+From: Richard P. Brent, "Algorithms for Minimization without Derivatives" (1973). Chapter 5.
+"""
+function brents_method_minimize(f, a::Real, b::Real, transform, t::Real; ε::Real=sqrt(eps))
+    a, b = min(a, b), max(a, b)
+    v = w = x = a + invphi2 * (b - a) #x is our best approximation
+    fv = fw = fx = f(transform(x)) #We must always have that fv >= fw >= fx (1)
+
+    e, prev_e = 0, 0 #e denotes the step we take in each cycle
+    m = (a + b) / 2
+    tol = ε * abs(x) + t
+
+    while abs(x - m) > 2*tol - (b - a) / 2
+        prev_prev_e = prev_e
+        prev_e = e
+        p, q = brents_pq(x, w, v, fx, fw, fv)
+        if SPI_is_well_behaved(a, b, x, p, q, prev_prev_e, tol)
+            #Then we do a "parabolic interpolation" step
+            e = p / q
+            u = x + e
+            if u - a < 2*tol || b - u < 2*tol #f must not be evaluated too close to a or b
+                e = x < m ? tol : -tol
+            end
+        else #We fall back to a "golden section" step
+            prev_e = x < m ? b - x : a - x #We want our prev_prev_e to inherit this value, since two GSS steps two iterations apart differ by a factor of invphi2
+            e = invphi2 * prev_e
+        end
+        if abs(e) < tol #f must not be evaluated too close to x
+            e = e > 0 ? tol : -tol
+        end
+        u = x + e
+        fu = f(transform(u))
+        #Update variables such that we satisfy (1) and discard the non-optimal interval
+        if fu <= fx
+            if u < x
+                b = x
+            else
+                a = x
+            end
+            v, fv = w, fw
+            w, fw = x, fx
+            x, fx = u, fu
+        else
+            if u < x
+                a = u
+            else
+                b = u
+            end
+            if fu <= fw || w == x
+                v, fv = w, fw
+                w, fw = u, fu
+            elseif fu <= fv || v == x || v == w
+                v, fv = u, fu
+            end
+        end
+        m = (a + b) / 2
+        tol = ε * abs(x) + t
+    end
+    return transform(x)
+end
+
+"""
+    univariate_maximize(f, a::Real, b::Real, transform, optimizer::BrentsMethodOpt, t::Real; ε::Real=sqrt(eps))
+Maximizes `f(x)` using Brent's method.
+See `?brents_method_minimize`.
+"""
+function univariate_maximize(f, a::Real, b::Real, transform, optimizer::BrentsMethodOpt, t::Real; ε::Real=sqrt(eps))
+    return brents_method_minimize(x -> -f(x), a, b, transform, t, ε = ε)
+end
+
 
 #This is SGD on trees, sampling branches (using the stochastic_ll_diffs function).
 #Promising, but need a LOT of testing. See the FUBAR notebook for a use example.
diff --git a/test/partition_selection.jl b/test/partition_selection.jl
@@ -53,9 +53,11 @@ begin
     felsenstein_down!(tree, x -> bm_models, partition_list = [2])
     felsenstein_down!(tree, x -> bm_models)
 
+    #TODO When we use BrentsMethodOpt, check if we gain a speed-up and that we're not catastrophically wrong
     branchlength_optim!(tree, bm_models, partition_list = [1])
     branchlength_optim!(tree, bm_models, partition_list = [2])
     branchlength_optim!(tree, bm_models)
+    branchlength_optim!(tree, bm_models, bl_optimizer=BrentsMethodOpt())
     branchlength_optim!(tree, x -> bm_models, partition_list = [2])
     branchlength_optim!(tree, x -> bm_models)
 
diff --git a/test/test_optim.jl b/test/test_optim.jl
@@ -2,6 +2,8 @@ begin
     f(x) = -(x - 2)^2
     m = golden_section_maximize(f, 1, 5, identity, 1e-20)
     @test m == 2.0
+    m = brents_method_minimize(x -> -f(x), 1, 5, identity, 1e-20)
+    @test m == 2.0
 end
 
 begin