Source code for TransportMaps.Optimization.Optimizers

#
# This file is part of TransportMaps.
#
# TransportMaps is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# TransportMaps is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with TransportMaps.  If not, see <http://www.gnu.org/licenses/>.
#
# Transport Maps Library
# Copyright (C) 2015-2018 Massachusetts Institute of Technology
# Uncertainty Quantification group
# Department of Aeronautics and Astronautics
#
# Author: Transport Map Team
# Website: transportmaps.mit.edu
# Support: transportmaps.mit.edu/qa/
#

import warnings
import scipy.linalg as scialg
from numpy.linalg import LinAlgError
from scipy.optimize import OptimizeResult
from scipy.optimize._optimize import approx_fhess_p
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2, LineSearchWarning
import numpy as np

__all__ = ['minimize_newtonlu','minimize_newtoncglu']

_epsilon = np.sqrt(np.finfo(float).eps)

# standard status messages of optimizers
_status_message = {'success': 'Optimization terminated successfully.',
                   'maxfev': 'Maximum number of function evaluations has '
                              'been exceeded.',
                   'maxiter': 'Maximum number of iterations has been '
                              'exceeded.',
                   'pr_loss': 'Desired error not necessarily achieved due '
                              'to precision loss.'}

class _LineSearchError(RuntimeError):
    pass

def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
                         **kwargs):
    """
    Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
    suitable step length is not found, and raise an exception if a
    suitable step length is not found.
    Raises
    ------
    _LineSearchError
        If no suitable step size is found
    """
    ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
                             old_fval, old_old_fval,
                             **kwargs)

    if ret[0] is None:
        # line search failed: try different one.
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', LineSearchWarning)
            ret = line_search_wolfe2(f, fprime, xk, pk, gfk,
                                     old_fval, old_old_fval)

    if ret[0] is None:
        raise _LineSearchError()

    return ret

[docs]def minimize_newtonlu(fun, x0, args=(), jac=None, hess=None, hessp=None, callback=None, xtol=1e-5, eps=_epsilon, maxiter=None, disp=False, return_all=False, **unknown_options): """ Minimization of scalar function of one or more variables using the Newton-LU algorithm. .. note:: the `jac` parameter (Jacobian) is required. Args: disp (bool): Set to True to print convergence messages. xtol (float): Average relative error in solution `xopt` acceptable for convergence. maxiter (int): Maximum number of iterations to perform. eps (float or ndarray): If `jac` is approximated, use this value for the step size. """ if jac is None: raise ValueError('Jacobian is required for Newton-LU method') f = fun fprime = jac fhess_p = hessp fhess = hess avextol = xtol epsilon = eps retall = return_all x0 = np.asarray(x0).flatten() fcalls, f = wrap_function(f, args) gcalls, fprime = wrap_function(fprime, args) hcalls = 0 if maxiter is None: maxiter = len(x0)*200 xtol = len(x0) * avextol update = [2 * xtol] xk = x0 if retall: allvecs = [xk] k = 0 old_fval = f(x0) old_old_fval = None float64eps = np.finfo(np.float64).eps warnflag = 0 while (np.add.reduce(np.abs(update)) > xtol) and (k < maxiter): # Compute a search direction pk by applying the CG method to # del2 f(xk) p = - grad f(xk) starting from 0. b = -fprime(xk) if fhess is not None: # you want to compute hessian once. A = fhess(*(xk,) + args) hcalls = hcalls + 1 #Now we need to check if the Hessian is positive definite. #If not, we will first try to modify it a certain number of times to make it positive definite. #After this number of trials we will revert to the steepest descent search direction Lchol, success, counterIt = makeHessianPositive_Cholesky( A ) if success == True: #Newton direction b_temp = scialg.solve_triangular(Lchol, b, trans=0, lower=True) pk = scialg.solve_triangular(Lchol, b_temp, trans='T', lower=True) # search direction is solution to system. else: #Steepest descent pk = b gfk = -b # gradient at xk try: alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break update = alphak * pk xk = xk + update # upcast if necessary if callback is not None: callback(xk) if retall: allvecs.append(xk) k += 1 fval = old_fval if warnflag == 2: msg = _status_message['pr_loss'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % fcalls[0]) print(" Gradient evaluations: %d" % gcalls[0]) print(" Hessian evaluations: %d" % hcalls) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % fcalls[0]) print(" Gradient evaluations: %d" % gcalls[0]) print(" Hessian evaluations: %d" % hcalls) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % fcalls[0]) print(" Gradient evaluations: %d" % gcalls[0]) print(" Hessian evaluations: %d" % hcalls) result = OptimizeResult(fun=fval, jac=gfk, nfev=fcalls[0], njev=gcalls[0], nhev=hcalls, status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k) if retall: result['allvecs'] = allvecs return result
[docs]def minimize_newtoncglu(fun, x0, args=(), jac=None, hess=None, hessp=None, callback=None, xtol=1e-5, eps=_epsilon, maxiter=None, disp=False, return_all=False, **unknown_options): """ Minimization of scalar function of one or more variables using the Newton-LU algorithm. .. note:: the `jac` parameter (Jacobian) is required. Args: disp (bool): Set to True to print convergence messages. xtol (float): Average relative error in solution `xopt` acceptable for convergence. maxiter (int): Maximum number of iterations to perform. eps (float or ndarray): If `jac` is approximated, use this value for the step size. """ if jac is None: raise ValueError('Jacobian is required for Newton-LU method') f = fun fprime = jac fhess_p = hessp fhess = hess avextol = xtol epsilon = eps retall = return_all x0 = np.asarray(x0).flatten() fcalls, f = wrap_function(f, args) gcalls, fprime = wrap_function(fprime, args) hcalls = 0 if maxiter is None: maxiter = len(x0)*200 xtol = len(x0) * avextol update = [2 * xtol] xk = x0 if retall: allvecs = [xk] k = 0 old_fval = f(x0) old_old_fval = None float64eps = np.finfo(np.float64).eps warnflag = 0 #### CG-ITERATION ##### while (np.add.reduce(np.abs(update)) > xtol) and (k < maxiter): # Compute a search direction pk by applying the CG method to # del2 f(xk) p = - grad f(xk) starting from 0. b = -fprime(xk) #Options iterative solver? maggrad = np.add.reduce(np.abs(b)) eta = np.min([0.5, np.sqrt(maggrad)]) termcond = eta * maggrad xsupi = np.zeros(len(x0), dtype=x0.dtype) ri = -b psupi = -ri i = 0 dri0 = np.dot(ri, ri) if fhess is not None: # you want to compute hessian once. A = fhess(*(xk,) + args) hcalls = hcalls + 1 while np.add.reduce(np.abs(ri)) > termcond: if fhess is None: if fhess_p is None: Ap = approx_fhess_p(xk, psupi, fprime, epsilon) else: Ap = fhess_p(xk, psupi, *args) hcalls = hcalls + 1 else: Ap = np.dot(A, psupi) # check curvature Ap = np.asarray(Ap).squeeze() # get rid of matrices... curv = np.dot(psupi, Ap) if 0 <= curv <= 3 * float64eps: break elif curv < 0: if (i > 0): break else: # fall back to steepest descent direction xsupi = dri0 / (-curv) * b break alphai = dri0 / curv xsupi = xsupi + alphai * psupi ri = ri + alphai * Ap dri1 = np.dot(ri, ri) betai = dri1 / dri0 psupi = -ri + betai * psupi i = i + 1 dri0 = dri1 # update np.dot(ri,ri) for next time. pk = xsupi # search direction is solution to system. gfk = -b # gradient at xk try: alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break update = alphak * pk xk = xk + update # upcast if necessary if callback is not None: callback(xk) if retall: allvecs.append(xk) k += 1 #### EXACT LU-ITERATION ##### update = [2 * xtol] while (np.add.reduce(np.abs(update)) > xtol) and (k < maxiter): # Compute a search direction pk by applying the CG method to # del2 f(xk) p = - grad f(xk) starting from 0. b = -fprime(xk) if fhess is not None: # you want to compute hessian once. A = fhess(*(xk,) + args) hcalls = hcalls + 1 #Now we need to check if the Hessian is positive definite. #If not, we will first try to modify it a certain number of times to make it positive definite. #After this number of trials we will revert to the steepest descent search direction Lchol, success, counterIt = makeHessianPositive_Cholesky( A ) if success == True: #Newton direction b_temp = scialg.solve_triangular(Lchol, b, trans=0, lower=True) pk = scialg.solve_triangular(Lchol, b_temp, trans='T', lower=True) # search direction is solution to system. else: #Steepest descent pk = b gfk = -b # gradient at xk try: alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break update = alphak * pk xk = xk + update # upcast if necessary if callback is not None: callback(xk) if retall: allvecs.append(xk) k += 1 fval = old_fval if warnflag == 2: msg = _status_message['pr_loss'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % fcalls[0]) print(" Gradient evaluations: %d" % gcalls[0]) print(" Hessian evaluations: %d" % hcalls) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % fcalls[0]) print(" Gradient evaluations: %d" % gcalls[0]) print(" Hessian evaluations: %d" % hcalls) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % fcalls[0]) print(" Gradient evaluations: %d" % gcalls[0]) print(" Hessian evaluations: %d" % hcalls) result = OptimizeResult(fun=fval, jac=gfk, nfev=fcalls[0], njev=gcalls[0], nhev=hcalls, status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k) if retall: result['allvecs'] = allvecs return result
##################### Auxiliary functions ############################################# def wrap_function(function, args): ncalls = [0] if function is None: return ncalls, None def function_wrapper(*wrapper_args): ncalls[0] += 1 return function(*(wrapper_args + args)) return ncalls, function_wrapper def makeHessianPositive_Cholesky( hess, beta = 1e-3 , maxIt = 10, prodFactor = 2 ): # From "Numerical Optimization" Nocedal & Wright pg.51 if isSymMatrix( hess )==False: hess = closestSymMatrix( hess ) # Force symmetry diagHess = np.diag(hess) dim = len(diagHess) hess_temp = np.copy(hess) #Decide initial shift if np.min(diagHess) > 0: tau = 0 else: tau = - np.min(diagHess) + beta counterIt = 0 flag = False while (flag == False and counterIt<maxIt): hess_temp[:,:] = hess # Assign original hessian diag_view = np.diagonal(hess_temp) diag_view.setflags(write=True) diag_view += tau # hess + tau*Id try: Lchol = np.linalg.cholesky(hess_temp) flag = True except LinAlgError: pass tau = max( prodFactor*tau, beta ) counterIt += 1 # Update counter if flag == True: success = True else: success = False Lchol=0 return Lchol, success, counterIt-1 def closestSymMatrix( A ): # Override the square matrix A with the closest symmetric matrix in the Frobenius norm(A+A^T) /2 return (A+ np.transpose(A))/2 def isSymMatrix( A ): return (A.transpose() == A).all()