#
# This file is part of TransportMaps.
#
# TransportMaps is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# TransportMaps is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with TransportMaps. If not, see <http://www.gnu.org/licenses/>.
#
# Transport Maps Library
# Copyright (C) 2015-2018 Massachusetts Institute of Technology
# Uncertainty Quantification group
# Department of Aeronautics and Astronautics
#
# Author: Transport Map Team
# Website: transportmaps.mit.edu
# Support: transportmaps.mit.edu/qa/
#
import warnings
import scipy.linalg as scialg
from numpy.linalg import LinAlgError
from scipy.optimize import OptimizeResult
from scipy.optimize._optimize import approx_fhess_p
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2, LineSearchWarning
import numpy as np
__all__ = ['minimize_newtonlu','minimize_newtoncglu']
_epsilon = np.sqrt(np.finfo(float).eps)
# standard status messages of optimizers
_status_message = {'success': 'Optimization terminated successfully.',
'maxfev': 'Maximum number of function evaluations has '
'been exceeded.',
'maxiter': 'Maximum number of iterations has been '
'exceeded.',
'pr_loss': 'Desired error not necessarily achieved due '
'to precision loss.'}
class _LineSearchError(RuntimeError):
pass
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
**kwargs):
"""
Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
suitable step length is not found, and raise an exception if a
suitable step length is not found.
Raises
------
_LineSearchError
If no suitable step size is found
"""
ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
old_fval, old_old_fval,
**kwargs)
if ret[0] is None:
# line search failed: try different one.
with warnings.catch_warnings():
warnings.simplefilter('ignore', LineSearchWarning)
ret = line_search_wolfe2(f, fprime, xk, pk, gfk,
old_fval, old_old_fval)
if ret[0] is None:
raise _LineSearchError()
return ret
[docs]def minimize_newtonlu(fun, x0, args=(), jac=None, hess=None, hessp=None,
callback=None, xtol=1e-5, eps=_epsilon, maxiter=None,
disp=False, return_all=False,
**unknown_options):
""" Minimization of scalar function of one or more variables using the Newton-LU algorithm.
.. note:: the `jac` parameter (Jacobian) is required.
Args:
disp (bool): Set to True to print convergence messages.
xtol (float): Average relative error in solution `xopt` acceptable for
convergence.
maxiter (int): Maximum number of iterations to perform.
eps (float or ndarray): If `jac` is approximated, use this value
for the step size.
"""
if jac is None:
raise ValueError('Jacobian is required for Newton-LU method')
f = fun
fprime = jac
fhess_p = hessp
fhess = hess
avextol = xtol
epsilon = eps
retall = return_all
x0 = np.asarray(x0).flatten()
fcalls, f = wrap_function(f, args)
gcalls, fprime = wrap_function(fprime, args)
hcalls = 0
if maxiter is None:
maxiter = len(x0)*200
xtol = len(x0) * avextol
update = [2 * xtol]
xk = x0
if retall:
allvecs = [xk]
k = 0
old_fval = f(x0)
old_old_fval = None
float64eps = np.finfo(np.float64).eps
warnflag = 0
while (np.add.reduce(np.abs(update)) > xtol) and (k < maxiter):
# Compute a search direction pk by applying the CG method to
# del2 f(xk) p = - grad f(xk) starting from 0.
b = -fprime(xk)
if fhess is not None: # you want to compute hessian once.
A = fhess(*(xk,) + args)
hcalls = hcalls + 1
#Now we need to check if the Hessian is positive definite.
#If not, we will first try to modify it a certain number of times to make it positive definite.
#After this number of trials we will revert to the steepest descent search direction
Lchol, success, counterIt = makeHessianPositive_Cholesky( A )
if success == True: #Newton direction
b_temp = scialg.solve_triangular(Lchol, b, trans=0, lower=True)
pk = scialg.solve_triangular(Lchol, b_temp, trans='T', lower=True) # search direction is solution to system.
else: #Steepest descent
pk = b
gfk = -b # gradient at xk
try:
alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \
_line_search_wolfe12(f, fprime, xk, pk, gfk,
old_fval, old_old_fval)
except _LineSearchError:
# Line search failed to find a better solution.
warnflag = 2
break
update = alphak * pk
xk = xk + update # upcast if necessary
if callback is not None:
callback(xk)
if retall:
allvecs.append(xk)
k += 1
fval = old_fval
if warnflag == 2:
msg = _status_message['pr_loss']
if disp:
print("Warning: " + msg)
print(" Current function value: %f" % fval)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % fcalls[0])
print(" Gradient evaluations: %d" % gcalls[0])
print(" Hessian evaluations: %d" % hcalls)
elif k >= maxiter:
warnflag = 1
msg = _status_message['maxiter']
if disp:
print("Warning: " + msg)
print(" Current function value: %f" % fval)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % fcalls[0])
print(" Gradient evaluations: %d" % gcalls[0])
print(" Hessian evaluations: %d" % hcalls)
else:
msg = _status_message['success']
if disp:
print(msg)
print(" Current function value: %f" % fval)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % fcalls[0])
print(" Gradient evaluations: %d" % gcalls[0])
print(" Hessian evaluations: %d" % hcalls)
result = OptimizeResult(fun=fval, jac=gfk, nfev=fcalls[0], njev=gcalls[0],
nhev=hcalls, status=warnflag,
success=(warnflag == 0), message=msg, x=xk,
nit=k)
if retall:
result['allvecs'] = allvecs
return result
[docs]def minimize_newtoncglu(fun, x0, args=(), jac=None, hess=None, hessp=None,
callback=None, xtol=1e-5, eps=_epsilon, maxiter=None,
disp=False, return_all=False,
**unknown_options):
""" Minimization of scalar function of one or more variables using the Newton-LU algorithm.
.. note:: the `jac` parameter (Jacobian) is required.
Args:
disp (bool): Set to True to print convergence messages.
xtol (float): Average relative error in solution `xopt` acceptable for
convergence.
maxiter (int): Maximum number of iterations to perform.
eps (float or ndarray): If `jac` is approximated, use this value
for the step size.
"""
if jac is None:
raise ValueError('Jacobian is required for Newton-LU method')
f = fun
fprime = jac
fhess_p = hessp
fhess = hess
avextol = xtol
epsilon = eps
retall = return_all
x0 = np.asarray(x0).flatten()
fcalls, f = wrap_function(f, args)
gcalls, fprime = wrap_function(fprime, args)
hcalls = 0
if maxiter is None:
maxiter = len(x0)*200
xtol = len(x0) * avextol
update = [2 * xtol]
xk = x0
if retall:
allvecs = [xk]
k = 0
old_fval = f(x0)
old_old_fval = None
float64eps = np.finfo(np.float64).eps
warnflag = 0
#### CG-ITERATION #####
while (np.add.reduce(np.abs(update)) > xtol) and (k < maxiter):
# Compute a search direction pk by applying the CG method to
# del2 f(xk) p = - grad f(xk) starting from 0.
b = -fprime(xk)
#Options iterative solver?
maggrad = np.add.reduce(np.abs(b))
eta = np.min([0.5, np.sqrt(maggrad)])
termcond = eta * maggrad
xsupi = np.zeros(len(x0), dtype=x0.dtype)
ri = -b
psupi = -ri
i = 0
dri0 = np.dot(ri, ri)
if fhess is not None: # you want to compute hessian once.
A = fhess(*(xk,) + args)
hcalls = hcalls + 1
while np.add.reduce(np.abs(ri)) > termcond:
if fhess is None:
if fhess_p is None:
Ap = approx_fhess_p(xk, psupi, fprime, epsilon)
else:
Ap = fhess_p(xk, psupi, *args)
hcalls = hcalls + 1
else:
Ap = np.dot(A, psupi)
# check curvature
Ap = np.asarray(Ap).squeeze() # get rid of matrices...
curv = np.dot(psupi, Ap)
if 0 <= curv <= 3 * float64eps:
break
elif curv < 0:
if (i > 0):
break
else:
# fall back to steepest descent direction
xsupi = dri0 / (-curv) * b
break
alphai = dri0 / curv
xsupi = xsupi + alphai * psupi
ri = ri + alphai * Ap
dri1 = np.dot(ri, ri)
betai = dri1 / dri0
psupi = -ri + betai * psupi
i = i + 1
dri0 = dri1 # update np.dot(ri,ri) for next time.
pk = xsupi # search direction is solution to system.
gfk = -b # gradient at xk
try:
alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \
_line_search_wolfe12(f, fprime, xk, pk, gfk,
old_fval, old_old_fval)
except _LineSearchError:
# Line search failed to find a better solution.
warnflag = 2
break
update = alphak * pk
xk = xk + update # upcast if necessary
if callback is not None:
callback(xk)
if retall:
allvecs.append(xk)
k += 1
#### EXACT LU-ITERATION #####
update = [2 * xtol]
while (np.add.reduce(np.abs(update)) > xtol) and (k < maxiter):
# Compute a search direction pk by applying the CG method to
# del2 f(xk) p = - grad f(xk) starting from 0.
b = -fprime(xk)
if fhess is not None: # you want to compute hessian once.
A = fhess(*(xk,) + args)
hcalls = hcalls + 1
#Now we need to check if the Hessian is positive definite.
#If not, we will first try to modify it a certain number of times to make it positive definite.
#After this number of trials we will revert to the steepest descent search direction
Lchol, success, counterIt = makeHessianPositive_Cholesky( A )
if success == True: #Newton direction
b_temp = scialg.solve_triangular(Lchol, b, trans=0, lower=True)
pk = scialg.solve_triangular(Lchol, b_temp, trans='T', lower=True) # search direction is solution to system.
else: #Steepest descent
pk = b
gfk = -b # gradient at xk
try:
alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \
_line_search_wolfe12(f, fprime, xk, pk, gfk,
old_fval, old_old_fval)
except _LineSearchError:
# Line search failed to find a better solution.
warnflag = 2
break
update = alphak * pk
xk = xk + update # upcast if necessary
if callback is not None:
callback(xk)
if retall:
allvecs.append(xk)
k += 1
fval = old_fval
if warnflag == 2:
msg = _status_message['pr_loss']
if disp:
print("Warning: " + msg)
print(" Current function value: %f" % fval)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % fcalls[0])
print(" Gradient evaluations: %d" % gcalls[0])
print(" Hessian evaluations: %d" % hcalls)
elif k >= maxiter:
warnflag = 1
msg = _status_message['maxiter']
if disp:
print("Warning: " + msg)
print(" Current function value: %f" % fval)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % fcalls[0])
print(" Gradient evaluations: %d" % gcalls[0])
print(" Hessian evaluations: %d" % hcalls)
else:
msg = _status_message['success']
if disp:
print(msg)
print(" Current function value: %f" % fval)
print(" Iterations: %d" % k)
print(" Function evaluations: %d" % fcalls[0])
print(" Gradient evaluations: %d" % gcalls[0])
print(" Hessian evaluations: %d" % hcalls)
result = OptimizeResult(fun=fval, jac=gfk, nfev=fcalls[0], njev=gcalls[0],
nhev=hcalls, status=warnflag,
success=(warnflag == 0), message=msg, x=xk,
nit=k)
if retall:
result['allvecs'] = allvecs
return result
##################### Auxiliary functions #############################################
def wrap_function(function, args):
ncalls = [0]
if function is None:
return ncalls, None
def function_wrapper(*wrapper_args):
ncalls[0] += 1
return function(*(wrapper_args + args))
return ncalls, function_wrapper
def makeHessianPositive_Cholesky( hess, beta = 1e-3 , maxIt = 10, prodFactor = 2 ):
# From "Numerical Optimization" Nocedal & Wright pg.51
if isSymMatrix( hess )==False:
hess = closestSymMatrix( hess ) # Force symmetry
diagHess = np.diag(hess)
dim = len(diagHess)
hess_temp = np.copy(hess)
#Decide initial shift
if np.min(diagHess) > 0:
tau = 0
else:
tau = - np.min(diagHess) + beta
counterIt = 0
flag = False
while (flag == False and counterIt<maxIt):
hess_temp[:,:] = hess # Assign original hessian
diag_view = np.diagonal(hess_temp)
diag_view.setflags(write=True)
diag_view += tau # hess + tau*Id
try:
Lchol = np.linalg.cholesky(hess_temp)
flag = True
except LinAlgError:
pass
tau = max( prodFactor*tau, beta )
counterIt += 1 # Update counter
if flag == True:
success = True
else:
success = False
Lchol=0
return Lchol, success, counterIt-1
def closestSymMatrix( A ):
# Override the square matrix A with the closest symmetric matrix in the Frobenius norm(A+A^T) /2
return (A+ np.transpose(A))/2
def isSymMatrix( A ):
return (A.transpose() == A).all()