from datetime import datetime
from os import path
import os
import csv

WEEK = "week"
MONTH = "month"
YEAR = "year"
ALL_TIME = "all_time"
DAY = "day"

FORMAT = '%Y-%m-%d'

# The location where the Netflix files are stored
netflix_files = ["data_big/netflix_prize/combined_data_1.txt", "data_big/netflix_prize/combined_data_2.txt",
                 "data_big/netflix_prize/combined_data_3.txt", "data_big/netflix_prize/combined_data_4.txt"]

APPROVAL_RANK = 3


def generate_stats(output_file):
    """
    Writes how many rankings and approvals each movie in the Netflix prize data has.

    Parameters
    ----------
    output_file : os.PathLike
        The file the statistics are written to.
    """
    movie_numbers = []
    for file in netflix_files:
        print("file", file)
        with open(file, "r") as f:
            cur_movie = None
            for line in f.readlines():
                if ":" in line:
                    if cur_movie:
                        movie_numbers.append((cur_movie, movie_number, movie_number_all))
                    cur_movie = line.split(":")[0]
                    movie_number = 0
                    movie_number_all = 0
                else:
                    voter_id, rating, date = line.split(",")
                    if int(rating.strip()) > APPROVAL_RANK:
                        movie_number += 1
                    movie_number_all += 1
    with open(output_file, "w") as f:
        writer = csv.DictWriter(f, fieldnames=["id", "approvals", "rankings"])
        writer.writeheader()
        for id, approvals, rankings in movie_numbers:
            writer.writerow({"id": id, "approvals": approvals, "rankings": rankings})


def generate_voter_stats(output_file):
    """
    Writes how many rankings and approvals each user in the Netflix prize data gives out.

    Parameters
    ----------
    output_file : os.PathLike
        The file the statistics are written to.
    """
    election = {}
    all_rated = {}
    cur_movie = None
    for file in netflix_files:
        print("file", file)
        with open(file, "r") as f:
            for line in f.readlines():
                if ":" in line:
                    cur_movie = int(line.split(":")[0].strip())
                else:
                    voter_id, rating, date = line.split(",")
                    if int(rating.strip()) > APPROVAL_RANK:
                        if voter_id not in election:
                            election[voter_id] = set()
                        election[voter_id].add(cur_movie)
                    if voter_id not in all_rated:
                        all_rated[voter_id] = set()
                    all_rated[voter_id].add(cur_movie)
    with open(output_file, "w") as f:
        writer = csv.DictWriter(f, fieldnames=["id", "approvals", "rankings"])
        writer.writeheader()
        for voter_id in all_rated:
            if voter_id not in election:
                election[voter_id] = set()
            writer.writerow(
                {"id": voter_id, "approvals": len(election[voter_id]), "rankings": len(all_rated[voter_id])})


def generate_votes_timeframe(timeframe, output_dir, popularity_cutoff=0):
    """
    Generates elections from the Netflix Prize data, divided by some timeframe.

    Parameters
    ----------
    timeframe : str
        By which timeframe the data is divided. The options are DAY, MONTH, WEEK, YEAR, ALL_TIME.
    output_dir : os.PathLike
        The directory the files are written to.
    popularity_cutoff : float
        The minimum proportion of approvals of the most popular movie a movie must obtain to be included.
    """
    times = {}
    for file in netflix_files:
        print("file", file)
        with open(file, "r") as f:
            cur_movie = None
            for line in f.readlines():
                if ":" in line:
                    cur_movie = line.split(":")[0]
                else:
                    voter_id, rating, date = line.split(",")
                    if int(rating.strip()) > APPROVAL_RANK:
                        date = datetime.strptime(date.strip(), FORMAT)
                        if timeframe == MONTH:
                            key = (date.year, date.month)
                        elif timeframe == WEEK:
                            key = (date.year, date.isocalendar()[1])
                        elif timeframe == YEAR:
                            key = date.year
                        elif timeframe == ALL_TIME:
                            key = ALL_TIME
                        elif timeframe == DAY:
                            key = (date.year, date.month, date.day)
                        else:
                            raise ValueError("Invalid timeframe %s" % str(timeframe))
                        if key not in times:
                            times[key] = {}
                        if voter_id not in times[key]:
                            times[key][voter_id] = set()
                        times[key][voter_id].add(cur_movie)
    output_dir = path.join(output_dir, timeframe + str(popularity_cutoff).split(".")[-1])
    os.makedirs(output_dir, exist_ok=True)
    print("generating files")
    nro_voters = []
    nro_alts = []
    for time in times:
        file_name = path.join(output_dir,
                              str(time).replace("(", "").replace(")", "").replace(",", "-").replace(" ", "") + ".txt")
        alts = {}
        for voter in times[time].values():
            for alt in voter:
                if alt not in alts:
                    alts[alt] = 0
                alts[alt] += 1
        max_approvals = max([x for _, x in alts.items()])
        unique_alts = []
        for alt, score in alts.items():
            if score >= max_approvals * popularity_cutoff:
                unique_alts.append(alt)
        to_delete = []
        for voter in times[time]:
            new_alts = set()
            for alt in times[time][voter]:
                if alts[alt] >= max_approvals * popularity_cutoff:
                    new_alts.add(alt)
            if new_alts:
                times[time][voter] = new_alts
            # If voter is empty, we delete it
            else:
                to_delete.append(voter)
        for voter in to_delete:
            del times[time][voter]
        with open(file_name, "w") as f:
            f.write("voters %d, alternatives %d\nalternatives: " % (len(times[time].keys()), len(unique_alts)))
            nro_voters.append(len(times[time].keys()))
            nro_alts.append(len(unique_alts))
            f.write(str(sorted(list(unique_alts))).replace("'", ""))
            f.write("\n")
            for voter_id in times[time]:
                f.write("%s : %s\n" % (voter_id, str(times[time][voter_id]).replace("'", "")))
    print(nro_voters)
    print(nro_alts)


def generate_votes_divide_by_movie_number(movie_number, output_dir):
    """
    Generates elections from the Netflix Prize data, divided into instances with movie_number many movies.

    Parameters
    ----------
    movie_number : int
        The number of movies each instance contains.
    output_dir : os.PathLike
        The directory the files are written to.
    """
    elections = []
    cur_election = None
    cur_movie = None
    for file in netflix_files:
        print("file", file)
        with open(file, "r") as f:
            for line in f.readlines():
                if ":" in line:
                    cur_movie = int(line.split(":")[0].strip())
                    if (cur_movie - 1) % movie_number == 0:
                        if cur_election:
                            elections.append(cur_election)
                        cur_election = {}
                        cur_election["start"] = cur_movie
                else:
                    cur_election["end"] = cur_movie
                    voter_id, rating, date = line.split(",")
                    if int(rating.strip()) > APPROVAL_RANK:
                        if voter_id not in cur_election:
                            cur_election[voter_id] = set()
                        cur_election[voter_id].add(cur_movie)
    if cur_election:
        elections.append(cur_election)
    output_dir = path.join(output_dir, "alternative_nro_%d" % movie_number)
    os.makedirs(output_dir, exist_ok=True)
    print("generating files")
    nro_voters = []
    nro_alts = []
    for election in elections:
        file_name = path.join(output_dir, "%d-%d.txt" % (election["start"], election["end"]))
        unique_alts = set()
        for voter in election:
            if voter not in ["start", "end"]:
                unique_alts = unique_alts.union(election[voter])
        with open(file_name, "w") as f:
            f.write("voters %d, alternatives %d\nalternatives: " % (len(election.keys()) - 2, len(unique_alts)))
            nro_voters.append(len(election.keys()) - 2)
            nro_alts.append(len(unique_alts))
            f.write(str(sorted(list(unique_alts))).replace("'", ""))
            f.write("\n")
            for voter in election:
                if voter not in ["start", "end"]:
                    f.write("%s : %s\n" % (voter, str(election[voter]).replace("'", "")))
    print(nro_voters)
    print(nro_alts)


def read_file(path_name):
    """
    Reads an election file and returns an election. The alternatives original names are ignored and they are numbered
    0,...,m-1. The first alternative in the alternatives-line becomes 0, the second 1 and so on.

    Parameters
    ----------
    path_name : os.PathLike
        The path the election is stored in.

    Returns
    -------
    votes : array[set[int]]
        List of alternative sets.
    m : int
        The number of alternatives.
    renames : array[int]
        The original names of each alternative.
    """
    with open(path_name, "r") as f:
        voters = []
        alternative_dict = {}
        alternative_indices = []
        for line in f.readlines():
            if not (line.startswith("voters") or line.startswith("alternatives")):
                alts = line.split(":")[1].replace("{", "").replace("}", "").replace("]", "").replace("[", "").split(",")
                voters.append(set([int(x.strip()) for x in alts]))
            elif line.startswith("alternatives"):
                for i, movie in enumerate(line.replace("]", "").replace("[", "").split(":")[1].split(",")):
                    movie = int(movie.replace("'", "").strip())
                    alternative_dict[movie] = i
                    alternative_indices.append(movie)
            elif line.startswith("voters"):
                voternro, altnro = [int(x.strip().split(" ")[1].strip()) for x in line.split(",")]
    new_votes = []
    for vote in voters:
        new_votes.append({alternative_dict[x] for x in vote})
    assert voternro == len(new_votes)
    assert altnro == len(alternative_indices)
    return new_votes, len(alternative_indices), alternative_indices


def read_only_alternative_indices(path):
    """
    Reads an election file and returns the renamings as described in read_file.

    Parameters
    ----------
    path_name : os.PathLike
        The path the election is stored in.

    Returns
    ---------
    array[int]
        The original names of each alternative.
    """
    with open(path, "r") as f:
        alternative_indices = []
        for line in f.readlines():
            if line.startswith("alternatives"):
                for i, movie in enumerate(line.replace("]", "").replace("[", "").split(":")[1].split(",")):
                    movie = int(movie.replace("'", "").strip())
                    alternative_indices.append(movie)
                break
    return alternative_indices


if __name__ == "__main__":
    # generate_stats("data_big/netflix_prize/movie_approvals.csv")
    # generate_voter_stats("data_big/netflix_prize/movie_approvals_voters.csv")
    # f = "data_big/netflix_prize/output_files"
    # v = read_file(path.join(f, MONTH, "2004-6.txt"))
    # print(v[0][:100], v[1])
    # generate_votes_divide_by_movie_number(50, f)
    print(read_file("data_big/synthetic/output_files/p0_2_f0_2/0.txt"))
    # generate_votes_timeframe(DAY, "data_big/netflix_prize/output_files", 0.01)
