Source code for src.manfred.minimize_manfred

import itertools

import numpy as np
from estimagic.batch_evaluators import joblib_batch_evaluator

from src.manfred.direct_search import do_manfred_direct_search
from src.manfred.linesearch import do_manfred_linesearch
from src.manfred.search_direction import calculate_manfred_direction
from src.manfred.shared import aggregate_evaluations
from src.manfred.shared import do_evaluations
from src.manfred.shared import hash_array
from src.manfred.shared import is_in_bounds


[docs]def minimize_manfred(
    func,
    x,
    step_sizes,
    lower_bounds=None,
    upper_bounds=None,
    max_fun=100_000,
    convergence_direct_search_mode="fast",
    direction_window=3,
    xtol=0.01,
    linesearch_active=True,
    linesearch_frequency=3,
    linesearch_n_points=5,
    max_step_sizes=None,
    n_evaluations_per_x=1,
    seed=0,
    gradient_weight=0.5,
    momentum=0.05,
    batch_evaluator=joblib_batch_evaluator,
    batch_evaluator_options=None,
):
    """MANFRED algorithm.

    MANFRED stands for Monotone Approximate Noise resistent algorithm For Robust
    optimization without Exact Derivatives

    It combines a very robust direct search step with an efficient line search step
    based on a search direction that is a byproduct of the direct search.

    It is meant for optimization problems that fulfill the following conditions:

    - A low number of parameters (10 is already a lot)
    - Presence of substantial true noise, i.e. the criterion function is stochastic and
      the noise is large enough to introduce local minima.
    - Bounds on all parameters are known
    - Nonlinear least square problems where the residuals are available. Moreover,
      we assume that all parameters influence the residuals positively over the whole
      parameter space.

    Despite being able to handle small local minima introduced by noise, MANFRED is a
    local optimization algorithm. If you need a global solution in the presence of
    multiple minima you need to run it from several starting points.

    MANFRED has the following features:

    - Highly parallelizable: You can scale MANFRED to up to 2 ** n_params *
      n_evaluations_per_x cores for a non parallelized criterion function.
    - Monotone: Only function values that actually lead to an improvement are used.

    Args:
        xtol (float): Maximal change in parameter
            vectors between two iterations to declare convergence.
        convergence_direct_search_mode (str): One of "fast", "thorough". If thorough,
            convergence is only declared if a two sided search for all parameters
            does not yield any improvement.
        max_fun (int): Maximal number of criterion evaluations. This
            The actual number of evaluations might be higher, because we only check
            at the start of each iteration if the maximum is reached.
        step_sizes (float or list): Step size or list of step sizes for the direct
            search step of the optimization. This step size refers to a rescaled
            parameter vector where all lower bounds are 0 and all upper bounds are 1. It
            is thus a relative step size.
            This is also the step size used to calculate an approximated gradient via
            finite differences because the approximated gradient is a free by product
            of the direct search. If a list of step sizes is provided, the algorithm is
            run with each step size in the list until convergence. Especially for noisy
            problems it is good to use a list of decreasing step sizes.
        max_step_sizes (float or list): Maximum step size that can be taken in any
            direction during the line search step. This step size also refers to the
            rescaled parameter vector. It needs to be a float or a list of
            the same length as step_sizes. A large max_step_size can lead to a fast
            convergence if the search direction is good. This is especially helpful at
            the beginning. Later, a small max_step limits the search space for the line
            search and can thus increase precision.
        direction_window (int): How many accepted parameters are used to
            determine if a parameter has momentum and we can thus switch
            to one-sided search for that parameter.
        gradient_weight (float): The search direction for the line search step is a
            weighted average of the negative gradient and direction taken in the last
            successful direct search step (both normalized to unit length).
            gradient_weight determines the weight of the gradient in this combination.
            Since the gradient contains quantitative information on the steepness in
            each direction it can lead to very fast convergence but is more sensitive
            to noise. Moreover, it only contains first order information.
            The direction from the direct search contains some second order information
            and is more robust to noise because it only uses the ordering of function
            values and not their size.
        momentum (float): The search direction is momentum * past search direction +
            (1 - momentum) * momentum free current search direction. More momentum can
            help to break out of local minima and to average out noise in the
            search direction calculation.
        linesearch_active (bool): Whether line search is used.
        linesearch_frequency (int or list): If linesearch_active is True this number
            specifies every how many iterations we do a line search step after
            the direct search step.Line search steps can lead to fast progress
            and/or refined solutions and the number of required function
            evaluations does not depend on the dimensionality. The disadvantage
            is that they make caching more inefficient by leaving the
            grid and that they make it harder to check convergence of the
            direct search with a given step size. 3 seems to be a sweet spot. Can be a
            list with the same length as step_sizes.
        linesearch_n_points (int): At how many points the function is evaluated during
            the line search. More points mean higher precision but also more sensitivity
            to noise.
        noise_seed (int): Starting point of a seed sequence.
        noise_n_evaluations_per_x (int): How often the criterion function is evaluated
            at each parameter vector in order to average out noise.
        batch_evaluator (callable): An estimagic batch evaluator.
        batch_evaluator_options (dict): Keyword arguments for the batch evaluator.

    """
    if batch_evaluator_options is None:
        batch_evaluator_options = {}

    bounds = _process_bounds(x, lower_bounds, upper_bounds)
    step_sizes, max_step_sizes = _process_step_sizes(step_sizes, max_step_sizes)
    n_evaluations_per_x = _process_n_evaluations_per_x(
        n_evaluations_per_x, len(step_sizes)
    )
    linesearch_active = _process_scalar_or_list_arg(linesearch_active, len(step_sizes))
    linesearch_frequency = _process_scalar_or_list_arg(
        linesearch_frequency, len(step_sizes)
    )

    assert 0 <= gradient_weight <= 1

    state = {
        "func_counter": 0,
        "iter_counter": 0,
        "inner_iter_counter": 0,
        "cache": {},
        "x_history": [hash_array(x)],
        "direction_history": [],
        "seed": itertools.count(seed),
    }

    do_evaluations(
        func,
        [x],
        state,
        n_evaluations_per_x[0],
        return_type="aggregated",
        batch_evaluator=batch_evaluator,
        batch_evaluator_options=batch_evaluator_options,
    )

    current_x = x
    last_iteration_x = x + np.inf
    for step_size, max_step_size, n_evals, use_ls, ls_freq in zip(
        step_sizes,
        max_step_sizes,
        n_evaluations_per_x,
        linesearch_active,
        linesearch_frequency,
    ):
        if state["func_counter"] >= max_fun:
            break

        state["inner_iter_counter"] = 0
        while True:
            if state["func_counter"] >= max_fun:
                break

            current_x, state = do_manfred_direct_search(
                func=func,
                current_x=current_x,
                step_size=step_size,
                state=state,
                direction_window=direction_window,
                bounds=bounds,
                mode="fast",
                n_evaluations_per_x=n_evals,
                batch_evaluator=batch_evaluator,
                batch_evaluator_options=batch_evaluator_options,
            )

            if (current_x != last_iteration_x).any():
                state["x_history"].append(hash_array(current_x))
                direction = calculate_manfred_direction(
                    current_x=current_x,
                    step_size=step_size,
                    state=state,
                    gradient_weight=gradient_weight,
                    momentum=momentum,
                )
                state["direction_history"].append(direction)
                after_direct_search_x = current_x

            if state["func_counter"] >= max_fun:
                break

            if use_ls and (state["iter_counter"] % ls_freq) == 0:
                current_x, state = do_manfred_linesearch(
                    func=func,
                    current_x=current_x,
                    direction=direction,
                    state=state,
                    n_points=linesearch_n_points,
                    bounds=bounds,
                    max_step_size=max_step_size,
                    n_evaluations_per_x=n_evals,
                    batch_evaluator=batch_evaluator,
                    batch_evaluator_options=batch_evaluator_options,
                )
                if (current_x != after_direct_search_x).any():
                    state["x_history"].append(hash_array(current_x))

            if state["func_counter"] >= max_fun:
                break

            # if neither the line search nor the first direct search brought any changes
            # try the more extensive line search mode
            if convergence_direct_search_mode == "thorough":
                if (np.abs(current_x - last_iteration_x) <= xtol).all():
                    current_x, state = do_manfred_direct_search(
                        func=func,
                        current_x=current_x,
                        step_size=step_size,
                        state=state,
                        direction_window=direction_window,
                        bounds=bounds,
                        mode="thorough",
                        n_evaluations_per_x=n_evals,
                        batch_evaluator=batch_evaluator,
                        batch_evaluator_options=batch_evaluator_options,
                    )

                    if (current_x != last_iteration_x).any():
                        state["x_history"].append(hash_array(current_x))
                        direction = calculate_manfred_direction(
                            current_x=current_x,
                            step_size=step_size,
                            state=state,
                            gradient_weight=gradient_weight,
                            momentum=momentum,
                        )
                        state["direction_history"].append(direction)

            state["iter_counter"] = state["iter_counter"] + 1
            state["inner_iter_counter"] = state["inner_iter_counter"] + 1
            # cause convergence
            if (np.abs(current_x - last_iteration_x) <= xtol).all():
                break
            last_iteration_x = current_x

    out_history = {"criterion": [], "x": []}
    for x_hash in state["x_history"]:
        cache_entry = state["cache"][x_hash]
        out_history["criterion"].append(aggregate_evaluations(cache_entry["evals"]))
        out_history["x"].append(cache_entry["x"])

    res = {
        "solution_x": current_x,
        "n_criterion_evaluations": state["func_counter"],
        "n_iterations": state["iter_counter"],
        "history": out_history,
    }

    return res


[docs]def _process_bounds(x, lower_bounds, upper_bounds):
    for bounds in [lower_bounds, upper_bounds]:
        if not np.isfinite(bounds).all():
            raise ValueError("All bounds need to be finite.")

    if not (lower_bounds < upper_bounds).all():
        raise ValueError("Lower bounds must be strictly smaller than upper bounds.")

    bounds = {"lower": lower_bounds, "upper": upper_bounds}

    if not is_in_bounds(x, bounds):
        raise ValueError("x must be inside the bounds.")

    return bounds


[docs]def _process_step_sizes(step_sizes, max_step_sizes):
    if step_sizes is None:
        step_sizes = 0.1
    step_sizes = _process_scalar_or_list_arg(step_sizes)
    if max_step_sizes is None:
        max_step_sizes = [ss * 3 for ss in step_sizes]
    else:
        max_step_sizes = _process_scalar_or_list_arg(max_step_sizes, len(step_sizes))

    for ss, mss in zip(step_sizes, max_step_sizes):
        assert ss <= mss

    return step_sizes, max_step_sizes


[docs]def _process_n_evaluations_per_x(n_evaluations_per_x, target_len):
    processed = _process_scalar_or_list_arg(n_evaluations_per_x, target_len)
    for n_evals in processed:
        assert n_evals >= 1
    return processed


[docs]def _process_scalar_or_list_arg(arg, target_len=None):
    if isinstance(arg, (list, tuple, np.ndarray)):
        processed = list(arg)
        if target_len is not None:
            assert len(processed) == target_len
    elif target_len is None:
        processed = [arg]
    else:
        processed = [arg] * target_len
    return processed