Source code for TransportMaps.DerivativesChecks

#
# This file is part of TransportMaps.
#
# TransportMaps is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# TransportMaps is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with TransportMaps.  If not, see <http://www.gnu.org/licenses/>.
#
# Transport Maps Library
# Copyright (C) 2015-2018 Massachusetts Institute of Technology
# Uncertainty Quantification group
# Department of Aeronautics and Astronautics
#
# Author: Transport Map Team
# Website: transportmaps.mit.edu
# Support: transportmaps.mit.edu/qa/
#

__all__ = [
    'fd',
    'fd_gradient_check',
    'action_hess_check',
]

import numpy as np
import time


[docs]def fd( f, x, dx, params=None, fshape=None, newdim=None, ): r""" Compute finite difference of ``f`` The function ``f`` may be any scalar, vector, matrix, tensor etc. valued function :math:`f:\mathbb{R}^d\rightarrow \mathbb{R}^{n_1 \times \cdot \times n_k}`. If the function :math:`f` has :math:`k` output dimensions, the result of :func:`fd` will be of :math:`k+1` dimensions, where the extra dimension will be appended at the position prescribed by ``newdim``. Args: f (:class:`function`): the function :math:`f` x (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): evaluation points. If the function is not vectorized then ``x`` should be of dimension :math:`d`. dx (float): finite difference perturbation params (dict): dictionary of parameters fshape (tuple): expected shape of the output of :math:`f`. It should correspond to :math:`n_1, \ldots, n_k`. If not provided, then it will be inferred by calling ``f``. newdim (int): dimension along which to add the gradient. Return: :class:`ndarray<numpy.ndarray>` [:math:`m,n_1,\ldots,n_{k+1}`] -- gradient of :math:`f`, where one of the :math:`n_i` is :math:`d`. """ if not params: params = {} # Figure out the dimension of the output if not fshape: tmp = f(x, **params) fshape = tmp.shape # Check whether function is vectorized by inspection of x vectorized = (x.ndim == 2) # Figure out the dimension d d = x.shape[1] if vectorized else x.shape[0] # Checking that newdim is at most contiguous if not newdim: newdim = len(fshape) elif newdim < 0 or newdim > len(fshape): raise ValueError( "The condition 0 <= newdim <= k must hold.") # Prepare output array out = np.zeros( fshape[:newdim] + (d,) + fshape[newdim:]) # Prepare output index idxbase = tuple( [slice(None)]* len(fshape) ) # Compute finite difference for i in range(d): xc_minus = x.copy() xc_plus = x.copy() if vectorized: xc_minus[:,i] -= dx/2. xc_plus[:,i] += dx/2. else: xc_minus[i] -= dx/2. xc_plus[i] += dx/2. fm = f(xc_minus, **params) fp = f(xc_plus, **params) idx = (idxbase[:newdim] + (i,) + idxbase[newdim:]) out[idx] = (fp-fm)/dx return out
[docs]def fd_gradient_check( f, gf, x, dx, rtol=None, atol=None, params=None, fshape=None, newdim=None, verbose=True, title='', ): r""" Check the gradient ``grad_f`` using finite difference approximation of ``f``. Args: f (:class:`function`): the function :math:`f` or :math:`(f, \nabla f)` gf (:class:`function`): the funtion :math:`\nabla f`. If ``None``, then it is assumed that ``f`` returns the tuple :math:`(f,\nabla f)`. x (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): evaluation points. If the function is not vectorized then ``x`` should be of dimension :math:`d`. dx (float): finite difference perturbation rtol (float): relative tolerance (default is ``dx``) atol (float): absolute tolerance (default is ``dx``) params (dict): dictionary of parameters fshape (tuple): expected shape of the output of :math:`f`. It should correspond to :math:`n_1, \ldots, n_k`. If not provided, then it will be inferred by calling ``f``. newdim (int): dimension along which to add the gradient. verbose (bool): whether to output the result of the test title (str): if ``verbose==True`` it is used to identify the gradient in the outputs Returns: :class:`bool` -- whether the gradient and its finite difference approximation are within the specified error tolerances. """ if not rtol: rtol = dx if not atol: atol = dx if not params: params = {} if verbose: print("Checking %s ... " % title, end='') # Figuring out whether the function f returns the tuple (f,\nabla f) tmp = f(x, **params) if isinstance(tmp, tuple): if gf is None: # Testing for the gradient ftuple = f def f(x, **params): return ftuple(x, **params)[0] def gf(x, **params): return ftuple(x, **params)[1] else: # Testing for the Hessian ftuple = f def f(x, **params): return ftuple(x, **params)[1] # Compute analytic gradient exa_start = time.time() exa = gf(x, **params) exa_stop = time.time() exa_time = exa_stop - exa_start # Compute the finite difference approximation app_start = time.time() app = fd(f, x, dx, params=params, fshape=fshape, newdim=newdim) app_stop = time.time() app_time = app_stop-app_start # Compute and evaluate error err = np.abs(app-exa).flatten() maxerr = np.max(err) success = np.allclose(exa, app, rtol=rtol, atol=atol) if verbose: if success: print("ok (Approximation time: %.4f - Analytic time: %.4f)" % ( app_time, exa_time)) else: print("FAIL (max err: %e)" % maxerr) return success
[docs]def action_hess_check( ghf, ahf, x, v, fd_dx=None, rtol=None, atol=None, params=None, fshape=None, newdim=None, verbose=True, title='', ): r""" Check the correctness of the action :math:`\langle \nabla^2 f, v \rangle`. If the perturbation ``fd_dx`` is provided, then ``ghf`` is assumed to be the gradient :math:`\nabla f` and the Hessian :math:`\nabla^2 f` is approximated using finite difference. Otherwise ``ghf`` is considered to be the Hessian :math:`\nabla^2 f` itself. Args: ghf (:class:`function`): the function :math:`\nabla^2 f` or :math:`\nabla f` of :math:`(f, \nabla f)`. In the latter two cases the Hessian is approximated with finite difference and ``fd_dx`` must be provided ahf (:class:`function`): the funtion :math:`\langle \nabla^ f, v \rangle` x (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): evaluation points. If the function is not vectorized then ``x`` should be of dimension :math:`d`. v (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): vector :math:`v`. If the action of the Hessian is not vectorized with respect to the perturbation, then ``v`` should be of dimension :maht:`d` fd_dx (float): finite difference perturbation rtol (float): relative tolerance (default is ``dx``) atol (float): absolute tolerance (default is ``dx``) params (dict): dictionary of parameters fshape (tuple): expected shape of the output of :math:`f`. It should correspond to :math:`n_1, \ldots, n_k`. If not provided, then it will be inferred by calling ``f``. newdim (int): dimension along which to add the gradient. verbose (bool): whether to output the result of the test title (str): if ``verbose==True`` it is used to identify the gradient in the outputs Returns: :class:`bool` -- whether the action of the Hessian is within the specified error tolerances. """ if not rtol: rtol = fd_dx if fd_dx else 1e-13 if not atol: atol = fd_dx if fd_dx else 1e-13 if not params: params = {} if verbose: print("Checking %s ... " % title, end='') # Compute analytic action of the Hessian exa_start = time.time() exa = ahf(x, v, **params) exa_stop = time.time() exa_time = exa_stop - exa_start # Compute the action of the Hessian app_start = time.time() if not fd_dx: # Compute the Hessian using ghf h = ghf(x, **params) else: # Figure out whether ghf returns \nabla f or the tuple (f, \nabla f) tmp = ghf(x, **params) if isinstance(tmp, tuple): ghftuple = ghf def ghf(x, **params): return ghftuple(x, **params)[1] # Compute the Hessian using finite difference h = fd(ghf, x, fd_dx, params=params, fshape=fshape, newdim=newdim) # Compute inner product if v.ndim == 2: app = np.einsum('...i,...i->...', h, v) else: app = np.einsum('...i,i->...', h, v) app_stop = time.time() app_time = app_stop - app_start # Compute and evaluate error err = np.abs(app-exa).flatten() maxerr = np.max(err) success = np.allclose(exa, app, rtol=rtol, atol=atol) if verbose: if success: print("ok (Approximation time: %.4f - Analytic time: %.4f)" % ( app_time, exa_time)) else: print("FAIL (max err: %e)" % maxerr) return success