import csv
import pandas
from os import path, listdir
from os.path import join, isfile

from matplotlib.lines import Line2D

import matplotlib.pyplot as plt
import numpy as np

import warnings

import tikzplotlib

from reconfiguration import ReconfigurationSolverExhaustive

warnings.filterwarnings("ignore")

# Fix for https://github.com/nschloe/tikzplotlib/issues/567
from matplotlib.lines import Line2D
from matplotlib.legend import Legend

Line2D._us_dashSeq = property(lambda self: self._dash_pattern[1])
Line2D._us_dashOffset = property(lambda self: self._dash_pattern[0])
Legend._ncol = property(lambda self: self._ncols)

_TIMEOUT = 60


def get_folder_from_synthetic_file(filename):
    """
    Gets the corresponding foldername from the statistics file name.

    Parameters
    ----------
    filename : str
        The name of the file.

    Returns
    -------
    str
        The folder name.
    """
    filename = path.basename(filename)
    folder = filename.split("path_stats")[1].split("_202")[0]
    return folder


def get_all_files(folder_dir, extension=""):
    return [join(folder_dir, f) for f in listdir(folder_dir) if
            isfile(join(folder_dir, f)) and (extension == "" or f.endswith(extension))]


# Line and marker formatting defaults.
_k_markers = {3: "^", 4: "s", 5: "p"}
_opt_req_lines = {0.95: ':', 0.98: '--', 1: '-', -1: '-'}
_f_markers = {0.5: "^", 0.75: "s", 1: "p"}
_rule_lines = {"cc": "-", "pav": ":"}
_f_lines = {0.25: ":", 0.5: "-.", 0.75: "--", 1: "-"}
_linestyle_colors = {"-": "r", ":": "b", "--": "g", "-.": "k"}
_rule_colors = {"cc": "k", "pav": "r"}
_extra_colors = {1: "r", 2: "b"}
_extra_markers = {1: "^", 2: "s"}

_imgw = 6
_imgh = 2.5

_OPT_REQ = "Opt. req."
_FINISHED = "Finished"
_CONNECTED = "Connectedness"
_RULE = "Rule"
_EXTRA_SWAPS = "Min. dist. \\%"
_NRO_OF_COMMITTEES_PREFIX = "$|\\rV|$"
_PATH_EXISTENCE_PROB = "\\makecell{Avg. \\% pairs with\\\\ reconfig. path} "
_SHORTEST_PATH_PROB = "\\makecell{Short \\\\path prob.}"
_NRO_OF_COMMITTEES_NON_LATEX = "Nr. of committees"
_AVERAGE_COMMITTEE_NRO = "Avg. " + _NRO_OF_COMMITTEES_PREFIX
_MEDIAN_COMMITTEE_NRO = "Med. " + _NRO_OF_COMMITTEES_PREFIX
_VARIANCE = "$\sigma^2$"
_MULTIPLE_COMMITTEES = _NRO_OF_COMMITTEES_PREFIX + " $> 1$"
_TIME_AVG = "Avg. time"
_PATH_TIME_AVG = "Avg. all pairs time"
_ALTS = "$m$"
_VOTERS = "$n$"
_NRO_INSTANCES = "Ins."
_FOLDERNAME = "Foldername"
_P = "$p$"
_F = "$\\phi$"
_R = "Radius"
_K = "$k$"
_AVG_MED_exp = " Avg. is an abbreviation for average and med. for median. "


def _make_first_row_multirow(ltx, title_line, depth, max_depth, nro_columns=None):
    """
    Given a LaTeX table string, if a value is repeated in the first column over multiple rows, merge them together.
    Internal function, not to be used outsife of make_first_row_multirow.

    Parameters
    ----------
    ltx : str
        LaTeX table string.
    title_line : str
        Whether the first line should be considered to contain the column names.
    depth : int
        The current column number we are trying to merge.
    max_depth : int
        The last column we may merge.
    nro_columns : int
        The number of columns in the instance. None implies it is not known yet.

    Returns
    -------
    str
        The modified table.
    """
    if depth > max_depth:
        return ltx
    lines = ltx.split("\n")
    new_ltx = ""
    i = 0
    while i < len(lines):
        line = lines[i]
        if "&" in line:
            if title_line:
                title_line = False
                nro_columns = len(line.split("&"))
                new_ltx += line + "\n"
                i += 1
            else:
                splits = line.split("&", depth + 1)
                if depth > 0:
                    beginning = "&".join(splits[:depth]) + " & "
                else:
                    beginning = ""
                first = splits[depth]
                rest = splits[depth + 1]
                to_skip = 1
                temp_ltx = ""
                while i + to_skip < len(lines):
                    lineI = lines[i + to_skip]
                    if "&" in lineI:
                        splitsI = lineI.split("&", depth + 1)
                        if depth > 0:
                            beginningI = "&".join(splitsI[:depth]) + " & "
                        else:
                            beginningI = ""
                        firstI = splitsI[depth]
                        restI = splitsI[depth + 1]
                        if firstI == first:
                            temp_ltx += beginningI + " &" + restI + "\n"
                            to_skip += 1
                        else:
                            break
                    else:
                        break
                if to_skip > 1:
                    temp_ltx = (beginning + "\\multirow{%d}{*}{%s} &" % (to_skip, first)) + rest + "\n" + \
                               temp_ltx
                    temp_ltx = _make_first_row_multirow(temp_ltx, False, depth + 1, max_depth, nro_columns)
                    if depth == 0:
                        new_ltx += temp_ltx + "\midrule\n"
                    else:
                        new_ltx += temp_ltx + "\cline{%d-%d}\n" % (depth + 1, nro_columns)
                else:
                    new_ltx += line + "\n"
                i += to_skip
        else:
            new_ltx += line + "\n"
            i += 1
    return new_ltx


def remove_repeat_bars(new_ltx):
    """
    Removes clines, bottomrules and midrules that are directly after each other. Might have to be run multiple times
    if there is more than two lines after each other.

    Parameters
    ----------
    new_ltx : str
        The LaTeX table string.

    Returns
    -------
    str:
        The modified LaTeX table.
    """
    lines = [x for x in new_ltx.split("\n") if x.strip()]
    new_ltx = ""
    for i in range(len(lines) - 1):
        if (not "bottomrule" in lines[i + 1] or not ("\\midrule" in lines[i] or "cline" in lines[i])) \
                and (not "midrule" in lines[i + 1] or not ("cline" in lines[i])):
            new_ltx += lines[i] + "\n"
    new_ltx += lines[-1] + "\n"
    return new_ltx


def make_first_row_multirow(ltx):
    """
    Given a LaTeX table string, if a value is repeated in the first column over multiple rows, merge them together.

    Parameters
    ----------
    ltx : str
        LaTeX table string.

    Returns
    -------
    str
        The modified table.
    """
    new_ltx = _make_first_row_multirow(ltx, True, 0, 3)
    return remove_repeat_bars(remove_repeat_bars(new_ltx))


def gray_tables(ltx, grey_th=75):
    """
    Given a LaTeX table, grey out the rows with fewer than grey_th % finished runs.

    Parameters
    ----------
    ltx : str
        The LaTeX table string.
    grey_th : int
        The threshold for graying out, in percentage.

    Returns
    -------
    str
        The formatted LaTeX table.
    """
    lines = ltx.split("\n")
    new_ltx = ""
    title_line = True
    for line in lines:
        if "&" in line:
            if title_line:
                title_line = False
                new_ltx += line + "\n"
                parts = line.split("&")
                for i, part in enumerate(parts):
                    if _FINISHED in part:
                        fin_i = i
            else:
                splits = line.split("&")
                if float(splits[fin_i].replace("\%", "")) < grey_th:
                    new_ltx += "&".join(splits[:fin_i + 1])
                    for s in splits[fin_i + 1:-1]:
                        new_ltx += "& {\\color{gray}" + s.strip() + "}"
                    new_ltx += "& {\\color{gray}" + splits[-1].replace("\\", "").strip() + "} \\\\"
                    new_ltx += "\n"
                else:
                    new_ltx += line + "\n"
        else:
            new_ltx += line + "\n"
    return new_ltx.replace("\\midrule\n\\bottomrule", "\\bottomrule")


def range_formatter(x):
    if len(x) == 1:
        return "%d -" % x[0]
    return "%d - %d" % x


def df_to_latex(summary_df, caption="", columns=None, extra_formatters=None, label=None, tablestar=True,
                first_row_multirow=False, make_gray_tables=False, aliases=None):
    """
    Turn a dataframe into a LaTeX table.

    Parameters
    ----------
    summary_df : pandas.DataFrame
        The dataframe to conver.
    caption : str
        The table caption.
    columns : list[str]
        The columns the table contains. If None, lists all columns.
    extra_formatters : dict[function]
        Formatters for the table entries that override the default formatting.
    label : str
        The label of the table.
    tablestar : bool
        If true, makes the table a two-column one.
    first_row_multirow : bool
        If true, merges repeat column values into one column.
    make_gray_tables : bool
        If true, grays out the table entries where at most 75 % of the instances did not finish.
    aliases : dict
        Renamings of the column names.

    Returns
    -------
    str
        The LaTeX table.
    """
    formatters = {_OPT_REQ: percentage_string, _PATH_EXISTENCE_PROB: percentage_string, _FINISHED: percentage_string,
                  _MULTIPLE_COMMITTEES: percentage_string,
                  _CONNECTED: percentage_string,
                  _F: float_string,
                  _AVERAGE_COMMITTEE_NRO: float_rounded,
                  _MEDIAN_COMMITTEE_NRO: float_string, _VARIANCE: float_rounded,
                  _SHORTEST_PATH_PROB: percentage_string,
                  _R: float_string, _P: float_string, _F: float_string, _ALTS: range_formatter,
                  _VOTERS: range_formatter}
    l_columns = {_RULE}
    column_format = ""
    if columns is None:
        columnsf = summary_df.columns
    else:
        columnsf = columns
    for column in columnsf:
        if column in l_columns:
            column_format += "l"
        else:
            column_format += "r"
    if extra_formatters:
        for format in extra_formatters:
            formatters[format] = extra_formatters[format]
    header = True
    if aliases:
        if columns is None:
            header = [aliases[x] if x in aliases else x for x in summary_df.columns]
        else:
            header = [aliases[x] if x in aliases else x for x in columns]
    ltx = summary_df.to_latex(index=False, columns=columns, formatters=formatters, label=label, caption=caption,
                              column_format=column_format, header=header)
    ltx = ltx.replace("pav ", "\\PAV").replace("cc ", "\\CC")
    ltx = "\n\\centering\n".join(ltx.split("\n", 1))
    if first_row_multirow:
        ltx = make_first_row_multirow(ltx)
    if make_gray_tables:
        ltx = gray_tables(ltx)
    if tablestar:
        ltx = ltx.replace("\\begin{table}", "\\begin{table*}")
        ltx = ltx.replace("\\end{table}", "\\end{table*}")
    return ltx


def to_latex(phrase):
    """
    Turn some phrases into their LaTeX commands.

    Parameters
    ----------
    phrase : str
        The phrase to be translated.

    Returns
    -------
    str
        The LaTeX command.
    """
    if phrase == "cc":
        return "\\CC"
    elif phrase == "pav":
        return "\\PAV"
    else:
        raise ValueError("Unknown expression to latex %s" % phrase)


def percentage_string(fl, d=2, latex=True):
    """
    Write a float as a percentage string, by default with two decimal points.
    Parameters
    ----------
    fl : float
        The number to be written.
    d : int
        The number of decimals.
    latex : bool
        Should the string be written as LaTeX, i.e., add \ before %.

    Returns
    -------
    str
        The percentage as a string.
    """
    if isinstance(fl, str):
        return fl
    format = "%." + str(d) + "f"
    if latex:
        p = " \%"
    else:
        p = " %"
    return (format % round(fl * 100, 2)) + p


def float_rounded(fl, d=2):
    """
    Return float as a string, rounded to d decimal places.

    Parameters
    ----------
    fl : float
        The float to round.
    d : int
        The decimal places.

    Returns
    -------
    str
        The formatted string.
    """
    if isinstance(fl, str):
        return fl
    format = "%." + str(d) + "f"
    return format % round(fl, d)


def float_string(fl):
    """
    Write a float as a string without trailing zeroes.

    Parameters
    ----------
    fl : float
        The float to write.

    Returns
    -------
    str
        A string containing the float.
    """
    if isinstance(fl, str):
        return fl
    if abs(int(fl) - fl) < 1e-8:
        return str(int(fl))
    old_s = str(fl)
    for i in range(len(old_s) - 1, -1, -1):
        if not old_s[i] == "0":
            break
    if old_s[i] == ".":
        i -= 1
    return old_s[:i + 1]


def median_average_runtime(df):
    """
    Compute the average reconfiguration graph construction time.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe obtained from the file written by experiment_runner.

    Returns
    -------
    (float, float, float, float, float)
        Median runtime, average runtime, variance, proportion of timeouts, number of the pairs that were considered
    """
    # Check frame is not empty
    if len(df['status']) == 0:
        return None, None, None, None, None
    tos = len(df[df['status'] == 'TO'])
    non_timeout_df = df[df['status'] == 'OK']
    all = len(non_timeout_df) + tos
    return (
    non_timeout_df['time'].median(), non_timeout_df['time'].mean(), non_timeout_df['time'].var(), tos / all, all)


def k_title(k):
    return "$\\csize = %d$" % k


def k_avg(k):
    return _TIME_AVG + " " + str(k)


def k_var(k):
    return _VARIANCE + " " + str(k)


def k_TO(k, rule=""):
    return "TO " + str(k) + rule


def k_count(k):
    return _NRO_INSTANCES + " " + str(k)


def runtime_stats(df, rules, ks, divisor_col1, divisor_col2=None, name="test", counts=False):
    """
    Generate tables about the runtimes of constructing the reconfiguration paths.
    Prints the LaTeX tables and generates figures.

    Parameters
    ----------
    df : pandas.DataFrame
        A dataframe containing the relevant data, divisor col values are assumed to be included
    rules : list[str]
        The voting rules for which the table is created.
    ks : int
        The committee sizes to consider.
    divisor_col1 : (str, list)
        First item is column name, second the possible values
    divisor_col2 : (str, list)
        First item is column name, second the possible values, optional
    divisor_cols : data to divide the runtimes based on
    name : str
        Name of the dataset
    counts : bool
        Whether to include the pair counts to the table

    Returns
    -------
    pandas.DataFrame
        A summary dataframe.
    """

    def add_ks(_relevant_df):
        tos = []
        for k in ks:
            relevant_df_k = _relevant_df[(_relevant_df["k"] == k)]
            med, avg, var, to, count = median_average_runtime(relevant_df_k)
            summary_df[k_title(k)].append(med)
            summary_df[k_avg(k)].append(avg)
            summary_df[k_var(k)].append(var)
            if counts:
                summary_df[k_count(k)].append(count)
            tos.append(to)
        return tos

    # can be used to find the part where there is genuinely no path
    non_path_df = df[df["pathlen"].isna()]
    non_path_df = non_path_df[non_path_df["status"] == "OK"]
    print(non_path_df)
    summary_df = {_RULE: []}
    timeout_df = {}
    summary_df[divisor_col1[0]] = []
    timeout_df[divisor_col1[0]] = []
    if divisor_col2:
        summary_df[divisor_col2[0]] = []
        timeout_df[divisor_col2[0]] = []
    for k in ks:
        summary_df[k_title(k)] = []
        summary_df[k_avg(k)] = []
        summary_df[k_var(k)] = []
        if counts:
            summary_df[k_count(k)] = []
        for rule in rules:
            timeout_df[k_TO(k, rule)] = []
    TOs = False
    timeout_dict = {}
    for rule in rules:
        for val1 in divisor_col1[1]:
            if divisor_col2:
                for val2 in divisor_col2[1]:
                    relevant_df = df[(df["rule"] == rule) & (df[divisor_col1[0]] == val1) &
                                     (df[divisor_col2[0]] == val2)]
                    summary_df[_RULE].append(rule)
                    # timeout_df[_RULE].append(rule)
                    summary_df[divisor_col1[0]].append(val1)
                    summary_df[divisor_col2[0]].append(val2)
                    tos = add_ks(relevant_df)
                    for i, k in enumerate(ks):
                        timeout_dict[rule, val1, val2, k] = tos[i]
            else:
                relevant_df = df[(df["rule"] == rule) & (df[divisor_col1[0]] == val1)]
                summary_df[_RULE].append(rule)
                # timeout_df[_RULE].append(rule)
                summary_df[divisor_col1[0]].append(val1)
                tos = add_ks(relevant_df)
                for i, k in enumerate(ks):
                    timeout_dict[rule, val1, k] = tos[i]
    # create the timeout df
    for val1 in divisor_col1[1]:
        if divisor_col2:
            for val2 in divisor_col2[1]:
                timeout_df[divisor_col1[0]].append(val1)
                timeout_df[divisor_col2[0]].append(val2)
                for rule in rules:
                    for k in ks:
                        timeout_df[k_TO(k, rule)].append(timeout_dict[rule, val1, val2, k])
        else:
            timeout_df[divisor_col1[0]].append(val1)
            for rule in rules:
                for k in ks:
                    timeout_df[k_TO(k, rule)].append(timeout_dict[rule, val1, k])
    summary_df = pandas.DataFrame(summary_df)
    timeout_df = pandas.DataFrame(timeout_df)
    if not divisor_col2:
        summary_df = summary_df.sort_values(by=[_RULE, divisor_col1[0]])
        timeout_df = timeout_df.sort_values(by=[divisor_col1[0]])
    else:
        summary_df = summary_df.sort_values(by=[_RULE, divisor_col1[0], divisor_col2[0]])
        timeout_df = timeout_df.sort_values(by=[divisor_col1[0], divisor_col2[0]])
    formatters = {k_title(k): float_rounded for k in ks}
    aliases = {}
    for k in ks:
        formatters[k_var(k)] = float_rounded
        formatters[k_avg(k)] = float_rounded
        for rule in rules:
            formatters[k_TO(k, rule)] = lambda x: percentage_string(x, 1)
            aliases[k_TO(k, rule)] = "$\\csize = %d %s$" % (k, rule)
        aliases[k_var(k)] = _VARIANCE
        aliases[k_avg(k)] = _TIME_AVG
        if counts:
            aliases[k_count(k)] = "Nr. pairs"
        # aliases[k_TO(k)] = k
    caption = "%s data: Median, average, and variance. The data is only taken on instances that did not time out." \
              " Timeout 60s." % (name[0].upper() + name[1:])
    label = "tbl:runtimes%s" % name
    ltx = df_to_latex(summary_df, caption=caption, extra_formatters=formatters, aliases=aliases,
                      first_row_multirow=True, label=label)
    ltx = ltx.replace("\\toprule", "")
    caption = "%s data: The proportion of timeouts. Timeout 60s." % (name[0].upper() + name[1:])
    label = "tbl:timeouts%s" % name
    ltx = ltx.replace("\\toprule", "")
    print(ltx)
    ltx = df_to_latex(timeout_df, caption=caption, extra_formatters=formatters, aliases=aliases,
                      first_row_multirow=True, label=label)
    print(ltx)
    if divisor_col2:
        for rule in rules:
            plot_data(summary_df, [rule], ks, divisor_col1, divisor_col2, filename=name + rule)
    else:
        plot_data(summary_df, rules, ks, divisor_col1, divisor_col2, filename=name)
    return summary_df


def manhattan_runtime_stats(stats_folder, rules, ks):
    """
    Generate tables about the runtimes of constructing the reconfiguration paths for Manhattan data.
    Prints the LaTeX tables and generates figures.

    Parameters
    ----------
    stats_folder : str
        The location of the the runtime statistics.
    df : pandas.DataFrame
        A dataframe containing the relevant data, divisor col values are assumed to be included
    rules : list[str]
        The voting rules for which the table is created.

    Returns
    -------
    pandas.DataFrame
        A summary dataframe.
    """
    filenames = get_all_files(stats_folder, "csv")
    li = []
    radi = set()
    for filename in filenames:
        folder = get_folder_from_synthetic_file(filename)
        r = float(folder.split("r")[1].replace("_", "."))
        df = pandas.read_csv(filename, index_col=None, header=0)
        df[_R] = r
        li.append(df)
        radi.add(r)
    df = pandas.concat(li, axis=0, ignore_index=True)
    divisor_col = (_R, sorted(list(radi)))
    return runtime_stats(df, rules, ks, divisor_col, name="manhattan")


def resampling_runtime_stats(stats_folder, rules, ks):
    """
    Generate tables about the runtimes of constructing the reconfiguration paths for Resampling data.
    Prints the LaTeX tables and generates figures.

    Parameters
    ----------
    stats_folder : str
        The location of the the runtime statistics.
    df : pandas.DataFrame
        A dataframe containing the relevant data, divisor col values are assumed to be included
    rules : list[str]
        The voting rules for which the table is created.

    Returns
    -------
    pandas.DataFrame
        A summary dataframe.
    """
    filenames = get_all_files(stats_folder, "csv")
    li = []
    ps = set()
    fs = set()
    for filename in filenames:
        folder = get_folder_from_synthetic_file(filename).replace("pfresampling_", "")
        splits = folder.split('_')
        p = float(splits[0][1:] + '.' + splits[1])
        f = float(splits[2][1:] + '.' + splits[3])
        df = pandas.read_csv(filename, index_col=None, header=0)
        df[_P] = p
        df[_F] = f
        li.append(df)
        ps.add(p)
        fs.add(f)
    df = pandas.concat(li, axis=0, ignore_index=True)
    divisor_col1 = (_P, list(ps))
    divisor_col2 = (_F, sorted(list(fs)))
    return runtime_stats(df, rules, ks, divisor_col1, divisor_col2, "resampling")


def plot_data(df, rules, ks, divisor_col1, divisor_col2=None, filename="test"):
    fig, ax = plt.subplots(layout='constrained')
    legend_elements = []
    if divisor_col1[0] == _VOTERS:
        col1name = _VOTERS + "X"
        df[_VOTERS + "X"] = df[_VOTERS].apply(lambda x: x[0])
    else:
        col1name = divisor_col1[0]
    for k in [3, 4, 5]:
        legend_elements.append(Line2D([0], [0], marker=_k_markers[k], color='w', label="k: %d" % k,
                                      markerfacecolor='0.3', markersize=10))
    for rule in rules:
        if len(rules) > 1:
            linestyle = _rule_lines[rule]
            legend_elements.append(Line2D([0], [0], label="Rule: " + rule.upper(),
                                          linestyle=linestyle, color=_linestyle_colors[linestyle]))
        for k in ks:
            values = df[(df[_RULE] == rule)]
            marker = _k_markers[k]
            if divisor_col2 is None:
                linestyle = _rule_lines[rule]
                ax.plot(col1name, k_title(k), marker=marker, linestyle=linestyle,
                        data=values, color=_linestyle_colors[linestyle], alpha=0.7, markersize=10)
            else:
                for val in divisor_col2[1]:
                    if divisor_col2[0] == _F:
                        linestyle = _f_lines[val]
                    else:
                        raise ValueError("Unsupported second column")
                    selected_values = values[values[_F] == val]
                    if k == ks[0]:
                        legend_elements.append(Line2D([0], [0], label=divisor_col2[0] + ": " + float_string(val),
                                                      linestyle=linestyle, color=_linestyle_colors[linestyle]))
                    ax.plot(col1name, k_title(k), marker=marker, linestyle=linestyle,
                            data=selected_values, color=_linestyle_colors[linestyle], alpha=0.7, markersize=10)
        ax.legend(handles=legend_elements)
        ax.set_xlabel(divisor_col1[0])
    ax.set_ylabel("Running time (s)")
    _imgw = 5
    _imgh = 4
    if divisor_col1[0] == _VOTERS:
        angle = -90
        xticks = [x[0] for x in divisor_col1[1][::2]]
        xlabels = [range_formatter(x) for x in divisor_col1[1][::2]]
    else:
        angle = 0
        xticks = divisor_col1[1]
        xlabels = divisor_col1[1]
    ax.set_xticks(xticks, labels=xlabels, rotation=angle)
    fig.set_size_inches(_imgw, _imgh)
    fig.tight_layout()
    fig.savefig(path.join("..", "images_path", filename + ".png"), dpi=300)
    # plt.show()


def netflix_runtime_stats(stats_folder, rules, ks, n_multiplier=1000, nmax=20000):
    """
    Generate tables about the runtimes of constructing the reconfiguration paths for Netflix data.
    Prints the LaTeX tables and generates figures.

    Parameters
    ----------
    stats_folder : str
        The location of the the runtime statistics.
    df : pandas.DataFrame
        A dataframe containing the relevant data, divisor col values are assumed to be included
    rules : list[str]
        The voting rules for which the table is created.
    n_multiplier : int
        Bucket size for ns
    nmax : int
        After this value all ns are in the same bucket.

    Returns
    -------
    pandas.DataFrame
        A summary dataframe.
    """
    filenames = get_all_files(stats_folder, "csv")
    li = []

    def get_category(i, multiplier, maxval=None):
        if maxval and i >= maxval:
            return (maxval,)
        i = int(i)
        m_low = (i // multiplier) * multiplier
        m_high = m_low + multiplier - 1
        return (m_low, m_high)

    def get_n_category(i):
        return get_category(i, n_multiplier, nmax)

    for filename in filenames:
        df = pandas.read_csv(filename, index_col=None, header=0)
        li.append(df)
    df = pandas.concat(li, axis=0, ignore_index=True)
    n_max = int(df["n"].max() + 1)
    df[_VOTERS] = df["n"].apply(get_n_category)
    # divisor_vals = [get_m_category(x) for x in range(m_multiplier, m_max, m_multiplier)]
    divisor2_vals = [get_n_category(x) for x in range(1, min(n_max, nmax + n_multiplier), n_multiplier)]
    return runtime_stats(df, rules, ks, (_VOTERS, divisor2_vals), name="netflix", counts=True)


def collective_stats(folders):
    """
    Generate statistics over all the experiments.

    Parameters
    ----------
    folders : list[str]
        The location of the the runtime statistic folders.

    Returns
    -------
    float, float, float, float, int
        The median, average, variance, proportion of timeouts, nr of pairs
    """
    filenames = []
    for folder in folders:
        filenames.extend(get_all_files(folder, "csv"))
    li = []
    for filename in filenames:
        df = pandas.read_csv(filename, index_col=None, header=0)
        li.append(df)
    df = pandas.concat(li, axis=0, ignore_index=True)
    avg_stats = median_average_runtime(df)
    return avg_stats


if __name__ == "__main__":
    base_folder = 'path_data'
    manhattan_stats_folder = path.join(base_folder, 'manhattan')
    resampling_stats_folder = path.join(base_folder, 'resampling')
    netflix_stats_folder = path.join(base_folder, 'netflix')
    rules = ["cc", "pav"]
    ks = [3, 4, 5]
    # print(collective_stats([manhattan_stats_folder, resampling_stats_folder, netflix_stats_folder]))
    manhattan_runtime_stats(manhattan_stats_folder, rules, ks)
    resampling_runtime_stats(resampling_stats_folder, rules, ks)
    netflix_runtime_stats(netflix_stats_folder, rules, ks)
