"""
Functions for contaminating data with outliers, to be used in conjunction with
problems in relevance_pursuit/benchmark/problems.

These processes are adapted from Andrade and Takeda (2023), "Robust Gaussian
Process Regression with the Trimmed Marginal Likelihood" and their code at
https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP.
"""

import numpy as np


def add_uniform_noise_from_indices(
    y: np.ndarray, rndOutlierIds: np.ndarray, symmetric: bool
):
    """
    Helper method.

    Adds uniform noise to y. When symmetric = True, noise is between [3 * sd(y), 9 * sd(y)].
    When symmetric = False, the first ~half of the noise disturbances are negated.

    Copied from
    https://github.com/andrade-stats/TrimmedMarginalLikelihoodGP/blob/master/generateSyntheticData.py#L140C1-L168C13
    with slight modifications.

    Args:
        y: The function values to add noise to.
        rndOutlierIds: The indices of the outliers to add.
        symmetric: If True, the first ~half of the noise disturbances are
            negative and second half are positive. If False, they are all negative.

    """
    if len(rndOutlierIds) == 0:
        return y

    y_std = np.std(y)
    CUT_OFFSET = 3.0 * y_std
    OUTLIER_LENGTH = 12.0 * y_std

    trueOutlierSamplesRaw = np.random.uniform(
        low=0.0, high=OUTLIER_LENGTH, size=(len(rndOutlierIds),)
    )

    if symmetric:
        lowerOutliers = (
            -trueOutlierSamplesRaw[trueOutlierSamplesRaw < OUTLIER_LENGTH / 2]
            - CUT_OFFSET
        )
        higherOutliers = (
            trueOutlierSamplesRaw[trueOutlierSamplesRaw >= OUTLIER_LENGTH / 2]
            - (OUTLIER_LENGTH / 2)
            + CUT_OFFSET
        )
    else:
        # scale by 0.5 in order to make symmetric and unsymmetric equally difficult
        lowerOutliers = -trueOutlierSamplesRaw * 0.5 - CUT_OFFSET
        higherOutliers = []
        if np.random.uniform() > 0.5:
            # swap
            higherOutliers = -1.0 * lowerOutliers
            lowerOutliers = []

    noise = np.hstack((lowerOutliers, higherOutliers))
    assert noise.shape[0] == rndOutlierIds.shape[0]
    new_y = y.copy()
    new_y[rndOutlierIds] += noise
    return new_y


def add_uniform_noise(
    y: np.ndarray, seed: int, outlier_frac: float = 0.1
) -> np.ndarray:
    """The "uniform" noise from Andrade and Takeda."""
    np.random.seed(seed)
    positions = np.random.choice(
        np.arange(len(y)), int(round(outlier_frac * len(y))), replace=False
    )
    return add_uniform_noise_from_indices(y, positions, True)


def add_focused_noise(
    y: np.ndarray, seed: int, outlier_frac: float = 0.1
) -> np.ndarray:
    """TODO"""
    pass


def add_asymmetric_noise(
    y: np.ndarray, seed: int, outlier_frac: float = 0.1
) -> np.ndarray:
    # same seed as the symmetric case
    np.random.seed(seed)
    positions = np.random.choice(
        np.arange(len(y)), int(round(outlier_frac * len(y))), replace=False
    )
    return add_uniform_noise_from_indices(y, positions, False)
