Source code for TransportMaps.L2.minimize_L2_distance

#
# This file is part of TransportMaps.
#
# TransportMaps is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# TransportMaps is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with TransportMaps.  If not, see <http://www.gnu.org/licenses/>.
#
# Transport Maps Library
# Copyright (C) 2015-2018 Massachusetts Institute of Technology
# Uncertainty Quantification group
# Department of Aeronautics and Astronautics
#
# Authors: Transport Map Team
# Website: transportmaps.mit.edu
# Support: transportmaps.mit.edu/qa/
#

import logging

import numpy as np
import numpy.linalg as npla
import scipy.optimize as sciopt
import scipy.linalg as scila

from ..L2 import L2squared_misfit, grad_a_L2squared_misfit, hess_a_L2squared_misfit, \
    storage_hess_a_L2squared_misfit, action_stored_hess_a_L2squared_misfit
from ..MPI import mpi_map, SumChunkReduce, mpi_map_alloc_dmem, mpi_bcast_dmem
from ..Maps.Functionals import ParametricFunctional
from ..Maps import ParametricComponentwiseMap, AffineTriangularMap


__all__ = [
    'map_regression',
    'functional_regression',
    'functional_regression_objective',
    'functional_regression_grad_a_objective',
    'functional_regression_hess_a_objective',
    'functional_regression_action_storage_hess_a_objective',
    'affine_triangular_map_regression'
]


[docs]def map_regression(
        tm: ParametricComponentwiseMap,
        t,
        tparams=None, d=None, qtype=None, qparams=None,
        x=None, w=None, x0=None,
        regularization=None, tol=1e-4, maxit=100,
        batch_size_list=None, mpi_pool_list=None):
    r""" Compute :math:`{\bf a}^* = \arg\min_{\bf a} \Vert T - T[{\bf a}] \Vert_{\pi}`.

    This regression problem can be completely decoupled if the measure
    is a product measure, obtaining

    .. math::

       a^{(i)*} = \arg\min_{\bf a^{(i)}} \Vert T_i - T_i[{\bf a}^{(i)}] \Vert_{\pi_i}

    Args:
      tm (ParametricComponentwiseMap): transport map :math:`T`
      t (function or :class:`ndarray<numpy.ndarray>` [:math:`m`]): function
        :math:`t` with signature ``t(x)`` or its functions values
      tparams (dict): parameters for function :math:`t`
      d (Distribution): distribution :math:`\pi`
      qtype (int): quadrature type to be used for the approximation of
        :math:`\mathbb{E}_{\pi}`
      qparams (object): parameters necessary for the construction of the
        quadrature
      x (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): quadrature points
        used for the approximation of :math:`\mathbb{E}_{\pi}`
      w (:class:`ndarray<numpy.ndarray>` [:math:`m`]): quadrature weights
        used for the approximation of :math:`\mathbb{E}_{\pi}`
      x0 (:class:`ndarray<numpy.ndarray>` [:math:`N`]): coefficients to be used
        as initial values for the optimization
      regularization (dict): defines the regularization to be used.
        If ``None``, no regularization is applied.
        If key ``type=='L2'`` then applies Tikonhov regularization with
        coefficient in key ``alpha``.
      tol (float): tolerance to be used to solve the regression problem.
      maxit (int): maximum number of iterations
      batch_size_list (:class:`list<list>` [d] :class:`tuple<tuple>` [3] :class:`int<int>`):
        Each of the tuples in the list corresponds to each component of the map.
        The entries of the tuple define whether to evaluate the regression
        in batches of a certain size or not.
        A size ``1`` correspond to a completely
        non-vectorized evaluation. A size ``None`` correspond to a
        completely vectorized one. (Note: if ``nprocs > 1``, then the batch
        size defines the size of the batch for each process)
      mpi_pool_list (:class:`list<list>` [d] :class:`mpi_map.MPI_Pool` or ``None``):
        pool of processes to be used for function evaluation, gradient evaluation and
        Hessian evaluation for each component of the approximation.
        Value ``None`` will use serial evaluation.

    Returns:
      (:class:`list<list>` [:math:`d`]) containing log information from the
      optimizer.

    .. seealso:: :mod:`MonotonicApproximation`

    .. note:: the resulting coefficients :math:`{\bf a}` are automatically
       set at the end of the optimization. Use :func:`get_coeffs` in order
       to retrieve them.

    .. note:: The parameters ``(qtype,qparams)`` and ``(x,w)`` are mutually
       exclusive, but one pair of them is necessary.
    """
    if isinstance(tm, AffineTriangularMap):
        return affine_triangular_map_regression(
            tm, t, tparams, d=d, qtype=qtype, qparams=qparams, x=x, w=w, regularization=regularization
        )

    if batch_size_list is None:
        batch_size_list = [(None, None, None)] * tm.dim_out
    if len(batch_size_list) != tm.dim_out:
        raise ValueError("len(batch_size_list) must be equal to tm.dim_out")
    if mpi_pool_list is None:
        mpi_pool_list = [None] * tm.dim_out
    if len(mpi_pool_list) != tm.dim_out:
        raise ValueError("len(mpi_pool_list) must be equal to tm.dim_out")
    if (x is None) and (w is None):
        (x, w) = d.quadrature(qtype, qparams)
    if isinstance(t, np.ndarray):
        T = t
    else:
        T = t(x)

    log_entry_list = []
    start_x0 = 0
    reg = None
    tm.logger.info("Starting decoupled regression of map.")
    for i, (a, avar, batch_size_tuple, mpi_pool) in enumerate(zip(
            tm.approx_list, tm.active_vars, batch_size_list, mpi_pool_list)):
        tm.logger.debug("Regression on component %i" % i + \
                          " (dim: %d - ncoeffs: %d" % (a.dim_in, a.n_coeffs) + \
                          " - npoints: %d)" % x.shape[0])
        if regularization is not None and regularization['type'] == 'L2':
            reg = {'type': 'L2',
                   'alpha': a.n_coeffs * regularization['alpha'] / tm.n_coeffs}
        x0a = None
        if x0 is not None:
            x0a = x0[start_x0:start_x0 + a.n_coeffs]
            start_x0 += a.n_coeffs
        (coeffs, log_entry) = functional_regression(
            a, T[:, i], x=x[:, avar], w=w, x0=x0a,
            regularization=reg, tol=tol,
            maxit=maxit,
            batch_size=batch_size_tuple,
            mpi_pool=mpi_pool
        )
        log_entry_list.append(log_entry)
        if not log_entry['success']:
            tm.logger.warning("Regression on component %i failed." % i)
    tm.logger.info("Finished decoupled regression of map.")
    return log_entry_list

[docs]def functional_regression(
        fn: ParametricFunctional,
        f,
        fparams=None, d=None, qtype=None, qparams=None,
        x=None, w=None, x0=None, regularization=None, tol=1e-4, maxit=100,
        batch_size=(None, None, None), mpi_pool=None, import_set=set()
):
    r""" Compute :math:`{\bf a}^* = \arg\min_{\bf a} \Vert f - f_{\bf a} \Vert_{\pi}`.

    Args:
      fn (ParametricFunctional): the function :math:`f_{\bf a}`
      f (:class:`Function` or :class:`ndarray<numpy.ndarray>` [:math:`m`]): function
        :math:`f` or its functions values
      fparams (dict): parameters for function :math:`f`
      d (Distribution): distribution :math:`\pi`
      qtype (int): quadrature type to be used for the approximation of
        :math:`\mathbb{E}_{\pi}`
      qparams (object): parameters necessary for the construction of the
        quadrature
      x (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): quadrature points
        used for the approximation of :math:`\mathbb{E}_{\pi}`
      w (:class:`ndarray<numpy.ndarray>` [:math:`m`]): quadrature weights
        used for the approximation of :math:`\mathbb{E}_{\pi}`
      x0 (:class:`ndarray<numpy.ndarray>` [:math:`N`]): coefficients to be used
        as initial values for the optimization
      regularization (dict): defines the regularization to be used.
        If ``None``, no regularization is applied.
        If key ``type=='L2'`` then applies Tikonhov regularization with
        coefficient in key ``alpha``.
      tol (float): tolerance to be used to solve the regression problem.
      maxit (int): maximum number of iterations
      batch_size (:class:`list<list>` [3] of :class:`int<int>`): the list contains the
        size of the batch to be used for each iteration. A size ``1`` correspond
        to a completely non-vectorized evaluation. A size ``None`` correspond to a
        completely vectorized one.
      mpi_pool (:class:`mpi_map.MPI_Pool`): pool of processes to be used
      import_set (set): list of couples ``(module_name,as_field)`` to be imported
        as ``import module_name as as_field`` (for MPI purposes)

    Returns:
      (:class:`tuple<tuple>`(:class:`ndarray<numpy.ndarray>` [:math:`N`],
      :class:`list<list>`)) -- containing the :math:`N` coefficients and
      log information from the optimizer.

    .. seealso:: :func:`TransportMaps.TriangularTransportMap.regression`

    .. note:: the resulting coefficients :math:`{\bf a}` are automatically
       set at the end of the optimization. Use :func:`coeffs` in order
       to retrieve them.

    .. note:: The parameters ``(qtype,qparams)`` and ``(x,w)`` are mutually
       exclusive, but one pair of them is necessary.
    """
    if (x is None) and (w is None):
        (x, w) = d.quadrature(qtype, qparams)

    params = {}
    params['fn'] = fn
    params['x'] = x
    params['w'] = w
    params['regularization'] = regularization
    params['batch_size'] = batch_size
    params['nobj'] = 0
    params['nda_obj'] = 0
    params['nda2_obj'] = 0
    params['nda2_obj_dot'] = 0
    params['hess_assembled'] = False
    params['mpi_pool'] = mpi_pool
    options = {'maxiter': maxit,
               'disp': False}
    if x0 is None:
        x0 = fn.get_default_init_values_regression()
    if fn.logger.getEffectiveLevel() <= logging.DEBUG:
        fn.logger.debug("regression(): Precomputation started")
    if isinstance(f, np.ndarray):
        params['fvals'] = f
    else:
        scatter_tuple = (['x'], [x])
        bcast_tuple = (['precomp'], [fparams])
        params['fvals'] = mpi_map("evaluate", scatter_tuple=scatter_tuple,
                                  bcast_tuple=bcast_tuple,
                                  obj=f, mpi_pool=mpi_pool)
    # Init precomputation memory
    params['params1'] = {}
    mpi_bcast_dmem(params1=params['params1'], f1=fn, mpi_pool=mpi_pool)
    # Precompute
    scatter_tuple = (['x'], [x])
    mpi_map("precomp_regression", scatter_tuple=scatter_tuple,
            dmem_key_in_list=['params1'],
            dmem_arg_in_list=['precomp'],
            dmem_val_in_list=[params['params1']],
            obj='f1', obj_val=fn,
            mpi_pool=mpi_pool, concatenate=False)

    if fn.logger.getEffectiveLevel() <= logging.DEBUG:
        fn.logger.debug("regression(): Precomputation ended")

    # Callback variables
    fn.params_callback = params

    # Minimize
    res = sciopt.minimize(
        functional_regression_objective, args=params, x0=x0,
        jac=functional_regression_grad_a_objective,
        # hessp=functional_regression_action_storage_hess_a_objective,
        # method='Newton-CG',
        method='BFGS',
        tol=tol, options=options,
        callback=fn.regression_callback)
    if not res['success']:
        fn.logger.warn("Regression failure: " + res['message'])

    # Clean up callback stuff
    del fn.params_callback

    coeffs = res['x']
    fn.coeffs = coeffs
    return (coeffs, res)


[docs]def functional_regression_objective(a, params):
    r""" Objective function :math:`\Vert f - f_{\bf a} \Vert^2_{\pi}`

    Args:
      a (:class:`ndarray<numpy.ndarray>` [:math:`N`]): coefficients
      params (dict): dictionary of parameters
    """
    fn = params['fn']
    params['nobj'] += 1
    x = params['x']
    w = params['w']
    fvals = params['fvals']
    batch_size = params['batch_size'][0]
    mpi_pool = params['mpi_pool']
    # Update coefficients
    bcast_tuple = (['coeffs'], [a])
    mpi_map("_set_coeffs", bcast_tuple=bcast_tuple,
            obj='f1', obj_val=fn,
            mpi_pool=mpi_pool, concatenate=False)
    # Evaluate L2 misfit
    scatter_tuple = (['x', 'w', 'f2'], [x, w, fvals])
    bcast_tuple = (['batch_size'], [batch_size])
    dmem_key_in_list = ['f1', 'params1']
    dmem_arg_in_list = ['f1', 'params1']
    dmem_val_in_list = [fn, params['params1']]
    reduce_obj = SumChunkReduce(axis=0)
    out = mpi_map(L2squared_misfit, scatter_tuple=scatter_tuple,
                  bcast_tuple=bcast_tuple,
                  dmem_key_in_list=dmem_key_in_list,
                  dmem_arg_in_list=dmem_arg_in_list,
                  dmem_val_in_list=dmem_val_in_list,
                  reduce_obj=reduce_obj,
                  mpi_pool=mpi_pool)
    if params['regularization'] is None:
        pass
    elif params['regularization']['type'] == 'L2':
        out += params['regularization']['alpha'] * \
               npla.norm(a - fn.regression_nominal_coeffs(), 2) ** 2.
    fn.logger.debug("Regression Obj. Eval. %d - L2-misfit = %.10e" % (params['nobj'], out))
    return out


[docs]def functional_regression_grad_a_objective(a, params):
    r""" Objective function :math:`\nabla_{\bf a} \Vert f - f_{\bf a} \Vert^2_{\pi}`

    Args:
      a (:class:`ndarray<numpy.ndarray>` [:math:`N`]): coefficients
      params (dict): dictionary of parameters
    """
    fn = params['fn']
    params['nda_obj'] += 1
    x = params['x']
    w = params['w']
    fvals = params['fvals']
    batch_size = params['batch_size'][1]
    mpi_pool = params['mpi_pool']
    # Update coefficients
    bcast_tuple = (['coeffs'], [a])
    mpi_map("_set_coeffs", bcast_tuple=bcast_tuple,
            obj='f1', obj_val=fn,
            mpi_pool=mpi_pool, concatenate=False)
    # Evaluate grad_a L2 misfit
    scatter_tuple = (['x', 'w', 'f2'], [x, w, fvals])
    bcast_tuple = (['batch_size'], [batch_size])
    dmem_key_in_list = ['f1', 'params1']
    dmem_arg_in_list = ['f1', 'params1']
    dmem_val_in_list = [fn, params['params1']]
    reduce_obj = SumChunkReduce(axis=0)
    out = mpi_map(grad_a_L2squared_misfit,
                  scatter_tuple=scatter_tuple,
                  bcast_tuple=bcast_tuple,
                  dmem_key_in_list=dmem_key_in_list,
                  dmem_arg_in_list=dmem_arg_in_list,
                  dmem_val_in_list=dmem_val_in_list,
                  reduce_obj=reduce_obj,
                  mpi_pool=mpi_pool)
    if params['regularization'] is None:
        pass
    elif params['regularization']['type'] == 'L2':
        out += params['regularization']['alpha'] * \
               2. * (a - fn.regression_nominal_coeffs())
    fn.logger.debug("Regression Grad_a Obj. Eval. %d - ||grad_a L2-misfit|| = %.10e" % (
        params['nda_obj'], npla.norm(out)))
    return out


[docs]def functional_regression_hess_a_objective(a, params):
    r""" Objective function :math:`\nabla_{\bf a}^2 \Vert f - f_{\bf a} \Vert^2_{\pi}`

    Args:
      a (:class:`ndarray<numpy.ndarray>` [:math:`N`]): coefficients
      params (dict): dictionary of parameters
    """
    fn = params['fn']
    params['nda2_obj'] += 1
    x = params['x']
    w = params['w']
    fvals = params['fvals']
    batch_size = params['batch_size'][2]
    mpi_pool = params['mpi_pool']
    # Update coefficients
    bcast_tuple = (['coeffs'], [a])
    mpi_map("_set_coeffs", bcast_tuple=bcast_tuple,
            obj='f1', obj_val=fn,
            mpi_pool=mpi_pool, concatenate=False)
    # Evaluate hess_a L2 misfit
    scatter_tuple = (['x', 'w', 'f2'], [x, w, fvals])
    bcast_tuple = (['batch_size'], [batch_size])
    dmem_key_in_list = ['f1', 'params1']
    dmem_arg_in_list = ['f1', 'params1']
    dmem_val_in_list = [fn, params['params1']]
    reduce_obj = SumChunkReduce(axis=0)
    out = mpi_map(hess_a_L2squared_misfit,
                  scatter_tuple=scatter_tuple,
                  bcast_tuple=bcast_tuple,
                  dmem_key_in_list=dmem_key_in_list,
                  dmem_arg_in_list=dmem_arg_in_list,
                  dmem_val_in_list=dmem_val_in_list,
                  reduce_obj=reduce_obj,
                  mpi_pool=mpi_pool)
    if params['regularization'] is None:
        pass
    elif params['regularization']['type'] == 'L2':
        out += np.diag( np.ones(len(fn.coeffs))*2.*params['regularization']['alpha'] )
    fn.logger.debug("Regression Hess_a Obj. Eval. %d " % params['nda2_obj'])
    return out


[docs]def functional_regression_action_storage_hess_a_objective(a, v, params):
    r""" Assemble/fetch Hessian :math:`\nabla_{\bf a}^2 \Vert f - f_{\bf a} \Vert^2_{\pi}` and evaluate its action on :math:`v`

    Args:
      a (:class:`ndarray<numpy.ndarray>` [:math:`N`]): coefficients
      v (:class:`ndarray<numpy.ndarray>` [:math:`N`]): vector on which to apply the Hessian
      params (dict): dictionary of parameters
    """
    fn = params['fn']
    x = params['x']
    w = params['w']
    fvals = params['fvals']
    batch_size = params['batch_size'][2]
    mpi_pool = params['mpi_pool']
    # Update coefficients
    bcast_tuple = (['coeffs'], [a])
    mpi_map("_set_coeffs", bcast_tuple=bcast_tuple,
            obj='f1', obj_val=fn,
            mpi_pool=mpi_pool, concatenate=False)
    # Assemble Hessian
    if not params['hess_assembled']:
        params['nda2_obj'] += 1
        scatter_tuple = (['x', 'w', 'f2'], [x, w, fvals])
        bcast_tuple = (['batch_size'], [batch_size])
        dmem_key_in_list = ['f1', 'params1']
        dmem_arg_in_list = ['f1', 'params1']
        dmem_val_in_list = [fn, params['params1']]
        dmem_key_out_list = ['hess_a_L2_misfit']
        (params['hess_a_L2_misfit'], ) = mpi_map_alloc_dmem(
            storage_hess_a_L2squared_misfit, scatter_tuple=scatter_tuple,
            bcast_tuple=bcast_tuple, dmem_key_in_list=dmem_key_in_list,
            dmem_arg_in_list=dmem_arg_in_list, dmem_val_in_list=dmem_val_in_list,
            dmem_key_out_list=dmem_key_out_list,
            mpi_pool=mpi_pool, concatenate=False)
        params['hess_assembled'] = True
        fn.logger.debug("Regression Storage Hess_a Obj. Eval. %d " % params['nda2_obj'])
    # Evaluate the action of hess_a L2 misfit
    params['nda2_obj_dot'] += 1
    bcast_tuple = (['v'], [v])
    dmem_key_in_list = ['hess_a_L2_misfit']
    dmem_arg_in_list = ['H']
    dmem_val_in_list = [params['hess_a_L2_misfit']]
    reduce_obj = SumChunkReduce(axis=0)
    out = mpi_map(action_stored_hess_a_L2squared_misfit,
                  bcast_tuple=bcast_tuple,
                  dmem_key_in_list=dmem_key_in_list,
                  dmem_arg_in_list=dmem_arg_in_list,
                  dmem_val_in_list=dmem_val_in_list,
                  reduce_obj=reduce_obj,
                  mpi_pool=mpi_pool)
    if params['regularization'] is None:
        pass
    elif params['regularization']['type'] == 'L2':
        regmat = np.diag( np.ones(len(fn.coeffs))*2.*params['regularization']['alpha'] )
        out += np.dot(regmat, v)
    fn.logger.debug("Regression Action Hess_a Obj. Eval. %d " % params['nda2_obj_dot'])
    return out


[docs]def affine_triangular_map_regression(
        tm: ParametricComponentwiseMap,
        t,
        tparams=None, d=None, qtype=None, qparams=None,
        x=None, w=None, regularization=None, **kwargs):
    r""" Compute :math:`{\bf a}^* = \arg\min_{\bf a} \Vert T - T({\bf a}) \Vert_{\pi}`.

    This regression problem can be completely decoupled if the measure
    is a product measure, obtaining

    .. math::

       a^{(i)*} = \arg\min_{\bf a^{(i)}} \Vert T_i - T_i({\bf a}^{(i)}) \Vert_{\pi_i}

    Args:
      tm (ParametricComponentwiseMap): map :math:`T({\bf a})`
      t (function or :class:`ndarray<numpy.ndarray>` [:math:`m`]): function
        :math:`t` with signature ``t(x)`` or its functions values
      tparams (dict): parameters for function :math:`t`
      d (Distribution): distribution :math:`\pi`
      qtype (int): quadrature type to be used for the approximation of
        :math:`\mathbb{E}_{\pi}`
      qparams (object): parameters necessary for the construction of the
        quadrature
      x (:class:`ndarray<numpy.ndarray>` [:math:`m,d`]): quadrature points
        used for the approximation of :math:`\mathbb{E}_{\pi}`
      w (:class:`ndarray<numpy.ndarray>` [:math:`m`]): quadrature weights
        used for the approximation of :math:`\mathbb{E}_{\pi}`
      regularization (dict): defines the regularization to be used.
        If ``None``, no regularization is applied.
        If key ``type=='L2'`` then applies Tikonhov regularization with
        coefficient in key ``alpha``.

    Returns:
      (:class:`tuple<tuple>` (:class:`ndarray<numpy.ndarray>` [:math:`N`],
      :class:`list<list>`)) -- containing the :math:`N` coefficients and
      log information.

    .. note:: the resulting coefficients :math:`{\bf a}` are automatically
       set at the end of the optimization. Use :func:`get_coeffs` in order
       to retrieve them.

    .. note:: The parameters ``(qtype,qparams)`` and ``(x,w)`` are mutually
       exclusive, but one pair of them is necessary.
    """
    if not isinstance(tm, AffineTriangularMap):
        raise ValueError('This routine for regression is designed only for AffineTriangularMap')

    if (x is None) and (w is None):
        (x,w) = d.quadrature(qtype, qparams)
    if isinstance(t, np.ndarray):
        T = t
    else:
        T = t(x)
    # Build weighted invsersion matrix
    X = np.hstack( (np.ones((x.shape[0],1)), x) )
    XW = X * w[:,np.newaxis]
    P = np.dot(XW.T, X)
    # Regularization
    if regularization is not None:
        if regularization['type'] == 'L2':
            P += regularization['alpha'] * np.eye(P.shape[0])
        else:
            raise NotImplementedError(
                "Regularization type %s not implemented." % regularization['type'])
    # Compute cholesky decomposition of inversion matrix
    U = scila.cholesky(P)
    # Iterate over the dimensions and solve for each component
    # (enforcing lower triangular structure)
    cnst = np.zeros(tm.dim)
    lin = np.zeros((tm.dim,tm.dim))
    for i in range(tm.dim):
        XWi = XW[:,:i+2]
        Ui = U[:i+2,:i+2]
        Ti = T[:,i]
        # Compute right hand side
        rhs = np.dot(XWi.T, Ti)
        # Solve system
        beta = scila.solve_triangular(Ui, rhs, lower=False, trans='T')
        beta = scila.solve_triangular(Ui, beta, lower=False)
        # Write coefficients
        cnst[i] = beta[0]
        lin[i,:i+1] = beta[1:]
    tm.c = cnst
    tm.L = lin
    log = {'success': True}
    return (tm.coeffs, log)