import argparse
import gensim.downloader as api
import json
import os
import numpy as np
from embedding_similarity_utils import load_embedding_model, normalize_vector, phrase_vector, cosine_similarity, find_highest_similarity
from error_utils import categorize_errors_total, categorize_errors_bbox, save_to_csv_total, save_to_csv_bbox
from constants import TOTAL_CLASS_TRAIN, TOTAL_CLASS_VALIDATION


def load_json(filename):
    with open(filename, 'r') as file:
        return json.load(file)

def main():
    parser = argparse.ArgumentParser(description="Process some integers.")
    parser.add_argument('flag', type=str, choices=['train', 'validation'], help='Flag to determine which total class to use')
    args = parser.parse_args()
    data_flag = args.flag
    if data_flag=='validation':
        total_class = TOTAL_CLASS_VALIDATION
    elif data_flag=='train':
        total_class = TOTAL_CLASS_TRAIN
        
    model = "glove-wiki-gigaword-200"
    wv_from_bin = load_embedding_model(model)
    json_files = [
        
    ]
    
    max_sim, best_pair = find_highest_similarity(wv_from_bin, total_class)
    if best_pair:
        print(f"The highest cosine similarity is {max_sim:.4f} between '{best_pair[0]}' and '{best_pair[1]}'.")
    else:
        print("One or both of the words in the highest similarity pair are not in the vocabulary.")
    
    for json_file in json_files:
        data = load_json(json_file)
        error_counts_total, error_percentages_total = categorize_errors_total(data, wv_from_bin, max_sim, data_flag)
        filename_total = json_file.split('/')[-1].replace('.json', '_error_statistics_total.csv')
        save_to_csv_total((error_counts_total, error_percentages_total), filename_total)
        
        bbox_error_stats = categorize_errors_bbox(data, wv_from_bin, max_sim, data_flag)
        filename_bbox = json_file.split('/')[-1].replace('.json', '_error_statistics_bbox.csv')
        save_to_csv_bbox(bbox_error_stats, filename_bbox)

    

if __name__ == '__main__':
    main()

    
