ai-dungeon/data/sheet_to_story.py

"""
format of tree is
dict {
    tree_id: tree_id_text
    context: context text?
    first_story_block
    action_results: [act_res1, act_res2, act_res3...]
}
where each action_result's format is:
dict{
    action: action_text
    result: result_text
    action_results: [act_res1, act_res2, act_res3...]
}
"""

import csv
import json
import os


def data_to_forest(filename):

    trees = []
    rows = []

    with open(filename, newline="") as f:
        reader = csv.reader(f)
        for row in reader:
            rows.append(row)

    for i in range(1, len(rows[0])):
        tree = {}
        tree["tree_id"] = "upwork" + str(i)
        tree["context"] = ""
        tree["first_story_block"] = rows[1][i]
        tree["action_results"] = []
        current_action_results = tree["action_results"]
        row_ind = 2
        while row_ind < len(rows):
            action_result = {}
            action_result["action"] = rows[row_ind][i]
            if row_ind + 1 < len(rows):
                action_result["result"] = rows[row_ind + 1][i]
            else:
                action_result["result"] = None
            action_result["action_results"] = []
            current_action_results.append(action_result)
            current_action_results = action_result["action_results"]
            row_ind += 2
        trees.append(tree)

    return trees


def build_action_samples_helper(context, story_block, action_results, path, tree_id):

    samples = []

    for i, action_result in enumerate(action_results):
        new_path = path[:]
        new_path.append(i)
        if (
            len(action_result["action_results"]) is 0
            and action_result["result"] is not None
        ):
            row = [
                tree_id,
                "".join(str(x) for x in new_path),
                context,
                story_block,
                action_result["action"],
                action_result["result"],
            ]
            samples.append(row)
        else:
            sub_result = build_action_samples_helper(
                context,
                action_result["result"],
                action_result["action_results"],
                new_path,
                tree_id,
            )
            samples += sub_result

    return samples


def make_write_actions_batch(forest, filename):
    # Traverse to the bottom levels of each tree
    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(
            file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
        writer.writerow(
            [
                "tree_id",
                "path",
                "context",
                "story_block_1",
                "previous_action",
                "story_block_2",
            ]
        )

        for tree in forest:
            first_story_block = tree["first_story_block"]
            samples = build_action_samples_helper(
                tree["context"],
                first_story_block,
                tree["action_results"],
                [],
                tree["tree_id"],
            )

            for sample in samples:
                writer.writerow(sample)


def build_result_samples_helper(
    context, story_block, parent_action_result, path, tree_id
):

    samples = []
    action_results = parent_action_result["action_results"]

    for i, action_result in enumerate(action_results):
        new_path = path[:]
        new_path.append(i)
        if action_result["result"] is None:
            row = [
                tree_id,
                "".join(str(x) for x in new_path),
                context,
                story_block,
                parent_action_result["action"],
                parent_action_result["result"],
                action_result["action"],
            ]
            samples.append(row)
        else:
            sub_result = build_result_samples_helper(
                context,
                parent_action_result["result"],
                action_result,
                new_path,
                tree_id,
            )
            samples += sub_result

    return samples


def make_write_results_batch(forest, filename):

    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(
            file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
        writer.writerow(
            [
                "tree_id",
                "path",
                "context",
                "story_block_1",
                "previous_action_1",
                "story_block_2",
                "previous_action_2",
            ]
        )

        for tree in forest:
            first_story_block = tree["first_story_block"]
            samples = []
            for i, action_result in enumerate(tree["action_results"]):
                path = [i]
                samples += build_result_samples_helper(
                    tree["context"],
                    first_story_block,
                    action_result,
                    path,
                    tree["tree_id"],
                )

            for sample in samples:
                writer.writerow(sample)


def save_tree(tree, filename):
    with open(filename, "w") as fp:
        json.dump(tree, fp)


def save_forest(forest, forest_name):

    if not os.path.exists("./" + forest_name):
        os.mkdir("./" + forest_name)
    for tree in forest:
        save_tree(tree, "./" + forest_name + "/" + tree["tree_id"] + ".json")


def load_tree(filename):
    with open(filename, "r") as fp:
        tree = json.load(fp)
    return tree


def load_forest(forest_name):

    files = os.listdir("./" + forest_name)
    forest = []
    for file in files:
        forest.append(load_tree("./" + forest_name + "/" + file))
    return forest


def csv_to_dict(file):
    update_dict = {}
    field_names = []

    with open(file, newline="") as f:
        reader = csv.reader(f)
        for row in reader:
            if len(update_dict) is 0:
                for item in row:
                    update_dict[item] = []
                    field_names.append(item)

            else:
                for i, item in enumerate(row):
                    update_dict[field_names[i]].append(item)

    return update_dict


def update_forest_with_results(forest_name, update_file):
    update_dict = csv_to_dict(update_file)
    tree_dict = {}
    tree_filenames = os.listdir("./" + forest_name)

    for file_name in tree_filenames:
        tree = load_tree("./" + forest_name + "/" + file_name)
        tree_dict[tree["tree_id"]] = tree

    for i in range(len(update_dict["Input.tree_id"])):
        tree = tree_dict[update_dict["Input.tree_id"][i]]
        current_action_results = tree
        for choice in update_dict["Input.path"][i]:
            choice_num = int(choice)
            current_action_results = current_action_results["action_results"][
                choice_num
            ]

        current_action_results["result"] = update_dict["Answer.result"][i]

    return tree_dict.values()


def update_forest_with_actions(forest_name, update_file):
    update_dict = csv_to_dict(update_file)
    tree_dict = {}
    tree_filenames = os.listdir("./" + forest_name)

    for file_name in tree_filenames:
        tree = load_tree("./" + forest_name + "/" + file_name)
        tree_dict[tree["tree_id"]] = tree

    for i in range(len(update_dict["Input.tree_id"])):
        tree = tree_dict[update_dict["Input.tree_id"][i]]
        current_action_results = tree
        for choice in update_dict["Input.path"][i]:
            choice_num = int(choice)
            current_action_results = current_action_results["action_results"][
                choice_num
            ]

        current_action_results["action_results"].append(
            {
                "action": update_dict["Answer.action_1"][i],
                "result": None,
                "action_results": [],
            }
        )
        current_action_results["action_results"].append(
            {
                "action": update_dict["Answer.action_2"][i],
                "result": None,
                "action_results": [],
            }
        )

    return tree_dict.values()


tree = data_to_forest("upwork.csv")
for i, story in enumerate(tree):
    save_tree(story, "crowdsourcedstory" + str(i) + ".json")
print("done")