import os
import pandas as pd

def normalize_group(group):
    '''
    This is a auxiliary function of smart_normalize.
    '''
    metrics_sep = ['sep_min', 'sep_avg']
    metrics_coesion = ['max_diam', 'max_avg', 'cs_ratio_DM', 'cs_ratio_AV']
    for metric in metrics_sep:
        max_value = group[metric].max()
        group[metric] = group[metric] / max_value

    for metric in metrics_coesion:
        min_value = group[metric].min()
        group[metric] = min_value / group[metric]
    return group

def smart_normalize(df, output_file):
    '''
    This function for each dataset, k, and metric, normalize the metric three possible values (one for each method) by the best one.
    Metrics in which the best value is the maximum, the normalization is done by dividing the value by the maximum value.
    Metrics in which the best value is the minimum, the normalization is done by dividing the minimum value by the value.
    '''
    df_normalized = df.groupby(['data_set', 'k']).apply(normalize_group).reset_index(drop=True)
    df_normalized.to_csv(output_file, sep='|', index=False)
    return df_normalized

def get_ratios_group(group):
    '''
    This is a auxiliary function of get_ratios.
    '''

    avg = group[group['method'] == 'average'].iloc[0]
    single = group[group['method'] == 'single'].iloc[0] 
    complete = group[group['method'] == 'complete'].iloc[0]

    group.loc[group['method'] == 'average', 'sep_min_avg_single'] = avg['sep_min'] / single['sep_min']
    group.loc[group['method'] == 'average', 'sep_avg_avg_single'] = avg['sep_avg'] / single['sep_avg']

    group.loc[group['method'] == 'average', 'sep_min_avg_complete'] = avg['sep_min'] / complete['sep_min']
    group.loc[group['method'] == 'average', 'sep_avg_avg_complete'] = avg['sep_avg'] / complete['sep_avg']

    group.loc[group['method'] == 'average', 'max_diam_avg_complete'] = avg['max_diam'] / complete['max_diam']
    group.loc[group['method'] == 'average', 'max_avg_avg_complete'] = avg['max_avg'] / complete['max_avg']

    group.loc[group['method'] == 'average', 'max_diam_avg_single'] = avg['max_diam'] / single['max_diam']
    group.loc[group['method'] == 'average', 'max_avg_avg_single'] = avg['max_avg'] / single['max_avg']

    group.loc[group['method'] == 'average', 'cs_ratio_DM_avg_complete'] = avg['cs_ratio_DM'] / complete['cs_ratio_DM']
    group.loc[group['method'] == 'average', 'cs_ratio_AV_avg_complete'] = avg['cs_ratio_AV'] / complete['cs_ratio_AV']

    group.loc[group['method'] == 'average', 'cs_ratio_DM_avg_single'] = avg['cs_ratio_DM'] / single['cs_ratio_DM']
    group.loc[group['method'] == 'average', 'cs_ratio_AV_avg_single'] = avg['cs_ratio_AV'] / single['cs_ratio_AV']  

    return group

def get_ratios(average_df, output_file):
    '''
    This function calculates the ratios between the average method and the single and complete methods for each metric and each dataset.
    '''
    ratios = average_df.groupby(['data_set', 'k_size']).apply(get_ratios_group)
    ratios = ratios[ratios['method'] == 'average'].reset_index(drop=True)
    ratios.drop(columns=['method', 'sep_min', 'sep_avg', 'max_diam', 'max_avg', 'cs_ratio_DM', 'cs_ratio_AV'], inplace=True)
    ratios.to_csv(output_file, sep='|', index=False)

def min_max_ratio(all_results_df, output_file):
    '''
    This function calculates from eache dataset, method, k_size, metric the (max_value - min_value)/mean_value of this metric for all k values in this dataset, method, k_size.
    '''
    all_results_df = all_results_df.drop(columns=['k','time'])
    all_results_df = all_results_df.loc[all_results_df['method'] != 'ward']

    summary_mean = all_results_df.groupby(['data_set', 'method', 'k_size']).mean()

    summary_max = all_results_df.groupby(['data_set', 'method', 'k_size']).max()

    summary_min = all_results_df.groupby(['data_set', 'method', 'k_size']).min()

    summary = summary_mean.copy()
    for metric in ['sep_min', 'sep_avg', 'max_diam', 'max_avg', 'cs_ratio_DM', 'cs_ratio_AV']:
        summary[metric] = (summary_max[metric] - summary_min[metric]) / summary_mean[metric]

    summary = summary.to_csv(output_file, sep='|', index=False)

def concatenate_results(folder, output_file):
    '''
    This function concatenates all CSV files in a folder (results_l?) into a single CSV file, that I call all_results_l?.csv.
    '''
    dataframes = []

    for filename in os.listdir(folder):
        if filename.endswith('.csv'):
            filepath = os.path.join(folder, filename)
            df = pd.read_csv(filepath, sep='|')
            dataframes.append(df)

    df_concatenado = pd.concat(dataframes, ignore_index=True)
    df_concatenado = df_concatenado.sort_values(by=['data_set', 'method', 'k_size', 'k'])
    df_concatenado.to_csv(output_file, index=False, sep='|')
    return df_concatenado

def get_average(smart_normalized_df, output_file):
    results = smart_normalized_df.copy()
    results.drop(columns=['time', 'k'], inplace=True)
    average_df = results.groupby(['data_set', 'method', 'k_size']).mean().reset_index()
    average_df = average_df[['data_set', 'method','k_size', 'sep_min', 'sep_avg', 'max_diam', 'max_avg', 'cs_ratio_DM', 'cs_ratio_AV']]
    average_df.to_csv(output_file, sep='|', index=False)
    return average_df

def get_average_smart_normalized(smart_normalized_df, output_file):
    results = smart_normalized_df.copy()
    results.drop(columns=['time', 'k', 'data_set'], inplace=True)
    average_df = results.groupby(['method', 'k_size']).mean().reset_index()
    average_df.to_csv(output_file, sep='|', index=False)
    return average_df

def main():
    all_results_l1 = concatenate_results('results_l1', 'extra_results\\all_results_l1.csv')
    all_results_l2 = concatenate_results('results', 'extra_results\\all_results_l2.csv')
    all_results_linf = concatenate_results('results_linf', 'extra_results\\all_results_linf.csv')

    all_results_l1 = all_results_l1[all_results_l1['method'] != 'ward']
    all_results_l2 = all_results_l2[all_results_l2['method'] != 'ward']
    all_results_linf = all_results_linf[all_results_linf['method'] != 'ward']

    smart_normalized_l1 = smart_normalize(all_results_l1, 'extra_results\\smart_normalized_l1.csv')
    smart_normalized_l2 = smart_normalize(all_results_l2, 'extra_results\\smart_normalized_l2.csv')
    smart_normalized_linf = smart_normalize(all_results_linf, 'extra_results\\smart_normalized_linf.csv')

    smart_average_l1 = get_average_smart_normalized(smart_normalized_l1, 'extra_results\\smart_average_l1.csv')
    smart_average_l2 = get_average_smart_normalized(smart_normalized_l2, 'extra_results\\smart_average_l2.csv')
    smart_average_linf = get_average_smart_normalized(smart_normalized_linf, 'extra_results\\smart_average_linf.csv')
    
if __name__ == '__main__':
    main()