import numpy as np
from numpy.random import default_rng
from dataset import read_dataset, save_dataset
import os

# datasets = ['column', 'digits4', 'iris', 'monks1', 'wine']
# datasets = ['penguin']
datasets = [
    'banknote', 'bcc', 'penguin', 'iran_customer_churn', 'column', 'digits4', 'seeds', 'occupancy_detection', 'fico',
    'australian_credit', 'give_credit'
]

# datasets = ['australian_credit', 'give_credit']
noises = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
num_trials = 100

for i, d in enumerate(datasets):
    X, y0, y1 = read_dataset(f'exponential_loss_csvs/datasets/{d}')
    filter = np.load(f'exponential_loss_csvs/datasets/{d}/filter.npy')[np.newaxis, :]

    for p in noises:
        X_all = []
        y0_all = []
        y1_all = []
        for t in range(num_trials):
            seed = int((p * 9182) * (t + 4.8)**2)
            rng = default_rng(seed)

            noise = rng.normal(loc=0, scale=p, size=X.shape)
            noise = noise * filter

            x = X + noise
            save_dataset(f'exponential_loss_csvs/datasets/{d}/noise_{p}/trial_{t}', x, y0, y1)

            X_all.append(x)
            y0_all.append(y0)
            y1_all.append(y1)

        X_all = np.concatenate(X_all, axis=0)
        y0_all = np.concatenate(y0_all, axis=0)
        y1_all = np.concatenate(y1_all, axis=0)

        save_dataset(f'exponential_loss_csvs/datasets/{d}/noise_{p}', X_all, y0_all, y1_all)
