import json
import sys

from data_generation.clueweb.extract_data import remove_tabs


def main(data_file: str) -> None:
    with open(data_file, "r") as f:
        d = json.load(f)
        nl_d = []
        for example in d:
            prompt = example["prompt"]

            prompt = prompt.replace("# website", "")
            prompt = prompt.replace("website = ", "Website URL: ")

            prompt = prompt.replace(
                "# observation of the current web page\n", ""
            )
            prompt = prompt.replace(
                'observation = """', "The content of the website:\n"
            )

            prompt = prompt.replace("# objective\n", "")
            prompt = prompt.replace("objective = ", "The objective: ")

            prompt = prompt.replace("# past actions\n", "The past actions:")
            past_actions = (
                prompt.split("def solve():")[1].strip().replace("\t", "")
            )
            past_actions = [
                line if not line.startswith("#") else line[2:]
                for line in past_actions.split("\n")
            ]
            past_actions = "\n".join(past_actions)

            prompt = prompt.split("def solve():")[0] + "\n" + past_actions
            prompt = prompt.replace('"""', "")

            response = example["response"]
            response = [
                line if not line.startswith("#") else line[2:]
                for line in response.split("\n")
            ]
            response = "\n".join(response)

            nl_d.append({"prompt": prompt, "response": response})

    with open(data_file.replace(".json", ".nl.json"), "w") as f:
        json.dump(nl_d, f)
    print(f"The result is saved to {data_file.replace('.json', '.nl.json')}")


if __name__ == "__main__":
    data_file = sys.argv[1]
    main(data_file)
