# import matplotlib.pyplot as plt
# import pandas as pd
# import numpy as np
# import re
# import matplotlib.colors as mcolors
# import matplotlib.cm as cm

# def parse_text(text):
#     # Use regex to extract the interto value, percentage, and mse
#     pattern = r"interto(\d+).*?_p(\d+).*?mse:(\d+\.\d+)"
#     matches = re.findall(pattern, text, re.DOTALL)
#     final_list = [(int(m[0]), int(m[1]), float(m[2])) for m in matches]
#     new_final_list = []
#     for answer_tuple in final_list:
#         if answer_tuple[0] >= 128 and answer_tuple[0] <= 1600 and answer_tuple[1] != 80 and answer_tuple[1] != 50 and answer_tuple[0] != 336  and answer_tuple[1] >= 11 and (answer_tuple[1] >= 20 or answer_tuple[0] <= 2000) and (answer_tuple[1] > 11 or answer_tuple[0] <= 1000):
#             new_final_list.append(answer_tuple)
#     return new_final_list

# def read_data(file_path):
#     with open(file_path, 'r') as file:
#         text = file.read()
#     results = parse_text(text)
#     return pd.DataFrame(results, columns=['Interto', 'Percentage', 'MSE'])

# def plot_data(df):
#     # Setup the colormap
#     norm = mcolors.LogNorm(vmin=df['Percentage'].min(), vmax=df['Percentage'].max())
#     scalar_map = cm.ScalarMappable(norm=norm, cmap=cm.viridis)

#     plt.figure(figsize=(10, 6))
#     grouped = df.groupby('Percentage')

#     for percentage, group in grouped:
#         sorted_group = group.sort_values('Interto')
#         mean_mse = sorted_group.groupby('Interto')['MSE'].mean()
#         std_mse = sorted_group.groupby('Interto')['MSE'].std()
#         intertos = mean_mse.index
#         color = scalar_map.to_rgba(percentage)
#         plt.errorbar(intertos, mean_mse, yerr=std_mse, fmt='o-', label=f'{percentage}%', color=color)

#     plt.title('MSE by Horizon. Model: Linear. Dataset: ETTh1. Pred len: 192.')
#     plt.xlabel('Horizon')
#     plt.xscale('log')
#     plt.ylabel('MSE')
#     plt.ylim(0.42, 0.58)
#     plt.xticks([128, 256, 512, 1024, 2048], ['128', '256', '512', '1024', '2048'])

#     # Colorbar with custom ticks
#     cbar = plt.colorbar(scalar_map, label='Percent of Training Data Used')
#     tick_locs = np.unique(df['Percentage'])  # Unique percentage values
#     cbar.set_ticks(tick_locs)
#     cbar.set_ticklabels(tick_locs)
    
#     plt.grid(True)
#     plt.legend(title='', loc='upper right')
#     plt.savefig("Mean_MSE_vs_Horizon_ETTh1.png")
    
    

# if __name__ == "__main__":
#     file_path = 'newresult_ETTh1_horizon.txt'
#     df = read_data(file_path)
#     plot_data(df)



import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import matplotlib.colors as mcolors
import matplotlib.cm as cm

def parse_text(text):
    # Use regex to extract the interto value, percentage, and mse
    pattern = r"interto(\d+).*?_p(\d+).*?mse:(\d+\.\d+)"
    matches = re.findall(pattern, text, re.DOTALL)
    final_list = [(int(m[0]), int(m[1]), float(m[2])) for m in matches]
    new_final_list = []
    for answer_tuple in final_list:
        # if answer_tuple[0] >= 128 and answer_tuple[0] <= 2500 and answer_tuple[1] >= 11 and answer_tuple[1] != 80 :
        # if answer_tuple[0] >= 32 and answer_tuple[0] <= 768 and answer_tuple[0] != 3 and answer_tuple[0] != 6 and answer_tuple[0] != 336 and answer_tuple[0] != 512:
            new_final_list.append(answer_tuple)
    
    
    
    return new_final_list

def read_data(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    results = parse_text(text)
    return pd.DataFrame(results, columns=['Interto', 'Percentage', 'MSE'])

def plot_data(df):
    # Setup the colormap
    norm = mcolors.LogNorm(vmin=df['Percentage'].min(), vmax=df['Percentage'].max())
    scalar_map = cm.ScalarMappable(norm=norm, cmap=cm.viridis)

    plt.figure(figsize=(10, 6))
    grouped = df.groupby('Percentage')
    
    for percentage, group in grouped:
        group.sort_values('Interto', inplace=True)
        color = scalar_map.to_rgba(percentage)
        plt.plot(group['Interto'], group['MSE'], label=f'{percentage}%', marker='o', markersize=4, color=color)
    
    plt.title('MSE by Horizon. Model: 4L-512Dim MLP. Dataset: Traffic. Pred len: 192.')
    plt.xlabel('horizon')
    # use log scale for x-axis
    plt.xscale('log')
    plt.ylabel('MSE')
    # plt.ylim(0.375, 0.455)
    
    # Colorbar with custom ticks
    cbar = plt.colorbar(scalar_map, label='Percent of Training Data Used')
    tick_locs = np.unique(df['Percentage'])  # Unique percentage values
    cbar.set_ticks(tick_locs)
    cbar.set_ticklabels(tick_locs)
    
    plt.grid(True)
    plt.legend(title='', loc='upper right')
    plt.xticks([128, 256, 512, 1024], ['128', '256', '512', '1024'])
    # remove other xticks.
    plt.gca().set_xticks([128, 256, 512, 1024], minor=False)
    # remove other xtick labels.
    plt.gca().set_xticklabels(['128', '256', '512', '1024'], minor=False)
    plt.savefig("newresult_Mean_MSE_vs_horizon_Traffic.png")

if __name__ == "__main__":
    file_path = 'newresult_traffic_horizon.txt'
    df = read_data(file_path)
    plot_data(df)


