"""Synthetic test problems for benchmarking robust estimation.

All synthetic functions must take no arguments and return x, f, and y, where
y = f + e and e ~ N(0, sigma). Either f or y will be corrupted by outliers
(with processes from benchmark/contamination_processes.py), and the goal will be
to estimate f.

These problems are adapted from Andrade and Takeda (2023), "Robust Gaussian
Process Regression with the Trimmed Marginal Likelihood" and their code at
https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP.
"""

from typing import Tuple

import numpy
import numpy as np


def get_bow_data(n: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Adapted from Andrade & Takeda (same function)"""

    def getScaledXData_forSyntheticSimpleSin(originalX):
        """
        Copied from here:
        https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP/blob/f810d4384454682a758fb79fd7352c4b3d4db7f4/simDataGeneration.py#L226
        """
        return (originalX - 0.5) * numpy.sqrt(12.0)

    def getResponse_forSyntheticSimpleSin(x):
        """
        Copied from here:
        https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP/blob/f810d4384454682a758fb79fd7352c4b3d4db7f4/simDataGeneration.py#L226https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP/blob/f810d4384454682a758fb79fd7352c4b3d4db7f4/simDataGeneration.py#L226
        """
        return 3.235 * (numpy.sin(((x / numpy.sqrt(12.0)) + 0.5) * numpy.pi)) - 2.058

    x_underlying = np.random.rand(n)
    x = getScaledXData_forSyntheticSimpleSin(x_underlying)
    y = getResponse_forSyntheticSimpleSin(x)
    return x[:, None], y, y


def get_friedman_data(n: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Adapted from
    https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP/blob/f810d4384454682a758fb79fd7352c4b3d4db7f4/simDataGeneration.py#L270C5-L270C20
    """
    nrVariables = 10

    RANDOM_GENERATOR_SEED = 9899832
    numpy.random.seed(RANDOM_GENERATOR_SEED)

    X: np.ndarray = numpy.random.rand(n, nrVariables)
    trueY = (
        numpy.sin(numpy.pi * X[:, 0] * X[:, 1]) * 10.0
        + 20.0 * numpy.square(X[:, 2] - 0.5)
        + 10.0 * X[:, 3]
        + 5.0 * X[:, 4]
    )

    noise = numpy.random.normal(size=n)
    y = trueY + noise

    return X, trueY, y
