import pytest
import os
import random
import math
import numpy as np
from sklearn.linear_model import LinearRegression
import networkx as nx

from benchmark.data_generators import VanillaGenerator, ConfundedGenerator, MeasureErrorGenerator, TiminoGenerator, UnfaithfulGenerator
from utils._data import DataSimulator
from utils._random_graphs import GaussianRandomPartition

seed = 42
random.seed(seed)
np.random.seed(seed)


##################### FIXTURES #####################

@pytest.fixture
def dag_sample():
    A = np.array([
        [0, 0, 1, 0, 0, 1],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 0],
        [0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0]
    ])
    return A


###################### Add developement test here! ######################
def test_correct_sample_size():
    """Test that under same data configuration,datasets with different
    sample size share the same ground truth underlying graph.
    """
    base_folder = os.path.join(os.sep, "efs", "data",  "ER", "gauss", "vanilla", "vanilla")
    configs = {
        "100_large20_sparse" : 100,
        # "1000_large20_sparse" : 1000,
    }
    # 10 datasets for each config
    for graph, num_samples in configs.items():
        for id in range(20):
            data = np.genfromtxt(os.path.join(base_folder, graph, f"data{id}.csv"), delimiter=",")
            n = data.shape[0]
            assert n == num_samples