AIDungeon/data/sheet_to_story.py

"""
format of tree is
dict {
    tree_id: tree_id_text
    context: context text?
    first_story_block
    action_results: [act_res1, act_res2, act_res3...]
}
where each action_result's format is:
dict{
    action: action_text
    result: result_text
    action_results: [act_res1, act_res2, act_res3...]
}
"""

import csv
import json
import os


def data_to_forest(filename):

    trees = []
    rows = []

    with open(filename, newline="") as f:
        reader = csv.reader(f)
        for row in reader:
            rows.append(row)

    for i in range(1, len(rows[0])):
        tree = {}
        tree["tree_id"] = "upwork" + str(i)
        tree["context"] = ""
        tree["first_story_block"] = rows[1][i]
        tree["action_results"] = []
        current_action_results = tree["action_results"]
        row_ind = 2
        while row_ind < len(rows):
            action_result = {}
            action_result["action"] = rows[row_ind][i]
            if row_ind + 1 < len(rows):
                action_result["result"] = rows[row_ind + 1][i]
            else:
                action_result["result"] = None
            action_result["action_results"] = []
            current_action_results.append(action_result)
            current_action_results = action_result["action_results"]
            row_ind += 2
        trees.append(tree)

    return trees


def build_action_samples_helper(context, story_block, action_results, path, tree_id):

    samples = []

    for i, action_result in enumerate(action_results):
        new_path = path[:]
        new_path.append(i)
        if (
            len(action_result["action_results"]) is 0
            and action_result["result"] is not None
        ):
            row = [
                tree_id,
                "".join(str(x) for x in new_path),
                context,
                story_block,
                action_result["action"],
                action_result["result"],
            ]
            samples.append(row)
        else:
            sub_result = build_action_samples_helper(
                context,
                action_result["result"],
                action_result["action_results"],
                new_path,
                tree_id,
            )
            samples += sub_result

    return samples


def make_write_actions_batch(forest, filename):
    # Traverse to the bottom levels of each tree
    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(
            file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
        writer.writerow(
            [
                "tree_id",
                "path",
                "context",
                "story_block_1",
                "previous_action",
                "story_block_2",
            ]
        )

        for tree in forest:
            first_story_block = tree["first_story_block"]
            samples = build_action_samples_helper(
                tree["context"],
                first_story_block,
                tree["action_results"],
                [],
                tree["tree_id"],
            )

            for sample in samples:
                writer.writerow(sample)


def build_result_samples_helper(
    context, story_block, parent_action_result, path, tree_id
):

    samples = []
    action_results = parent_action_result["action_results"]

    for i, action_result in enumerate(action_results):
        new_path = path[:]
        new_path.append(i)
        if action_result["result"] is None:
            row = [
                tree_id,
                "".join(str(x) for x in new_path),
                context,
                story_block,
                parent_action_result["action"],
                parent_action_result["result"],
                action_result["action"],
            ]
            samples.append(row)
        else:
            sub_result = build_result_samples_helper(
                context,
                parent_action_result["result"],
                action_result,
                new_path,
                tree_id,
            )
            samples += sub_result

    return samples


def make_write_results_batch(forest, filename):

    with open(filename, mode="w", newline="") as file:
        writer = csv.writer(
            file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL
        )
        writer.writerow(
            [
                "tree_id",
                "path",
                "context",
                "story_block_1",
                "previous_action_1",
                "story_block_2",
                "previous_action_2",
            ]
        )

        for tree in forest:
            first_story_block = tree["first_story_block"]
            samples = []
            for i, action_result in enumerate(tree["action_results"]):
                path = [i]
                samples += build_result_samples_helper(
                    tree["context"],
                    first_story_block,
                    action_result,
                    path,
                    tree["tree_id"],
                )

            for sample in samples:
                writer.writerow(sample)


def save_tree(tree, filename):
    with open(filename, "w") as fp:
        json.dump(tree, fp)


def save_forest(forest, forest_name):

    if not os.path.exists("./" + forest_name):
        os.mkdir("./" + forest_name)
    for tree in forest:
        save_tree(tree, "./" + forest_name + "/" + tree["tree_id"] + ".json")


def load_tree(filename):
    with open(filename, "r") as fp:
        tree = json.load(fp)
    return tree


def load_forest(forest_name):

    files = os.listdir("./" + forest_name)
    forest = []
    for file in files:
        forest.append(load_tree("./" + forest_name + "/" + file))
    return forest


def csv_to_dict(file):
    update_dict = {}
    field_names = []

    with open(file, newline="") as f:
        reader = csv.reader(f)
        for row in reader:
            if len(update_dict) is 0:
                for item in row:
                    update_dict[item] = []
                    field_names.append(item)

            else:
                for i, item in enumerate(row):
                    update_dict[field_names[i]].append(item)

    return update_dict


def update_forest_with_results(forest_name, update_file):
    update_dict = csv_to_dict(update_file)
    tree_dict = {}
    tree_filenames = os.listdir("./" + forest_name)

    for file_name in tree_filenames:
        tree = load_tree("./" + forest_name + "/" + file_name)
        tree_dict[tree["tree_id"]] = tree

    for i in range(len(update_dict["Input.tree_id"])):
        tree = tree_dict[update_dict["Input.tree_id"][i]]
        current_action_results = tree
        for choice in update_dict["Input.path"][i]:
            choice_num = int(choice)
            current_action_results = current_action_results["action_results"][
                choice_num
            ]

        current_action_results["result"] = update_dict["Answer.result"][i]

    return tree_dict.values()


def update_forest_with_actions(forest_name, update_file):
    update_dict = csv_to_dict(update_file)
    tree_dict = {}
    tree_filenames = os.listdir("./" + forest_name)

    for file_name in tree_filenames:
        tree = load_tree("./" + forest_name + "/" + file_name)
        tree_dict[tree["tree_id"]] = tree

    for i in range(len(update_dict["Input.tree_id"])):
        tree = tree_dict[update_dict["Input.tree_id"][i]]
        current_action_results = tree
        for choice in update_dict["Input.path"][i]:
            choice_num = int(choice)
            current_action_results = current_action_results["action_results"][
                choice_num
            ]

        current_action_results["action_results"].append(
            {
                "action": update_dict["Answer.action_1"][i],
                "result": None,
                "action_results": [],
            }
        )
        current_action_results["action_results"].append(
            {
                "action": update_dict["Answer.action_2"][i],
                "result": None,
                "action_results": [],
            }
        )

    return tree_dict.values()


tree = data_to_forest("upwork.csv")
for i, story in enumerate(tree):
    save_tree(story, "crowdsourcedstory" + str(i) + ".json")
print("done")
Initial commit 2025-03-11 22:26:45 -04:00			`"""`
			`format of tree is`
			`dict {`
			`tree_id: tree_id_text`
			`context: context text?`
			`first_story_block`
			`action_results: [act_res1, act_res2, act_res3...]`
			`}`
			`where each action_result's format is:`
			`dict{`
			`action: action_text`
			`result: result_text`
			`action_results: [act_res1, act_res2, act_res3...]`
			`}`
			`"""`

			`import csv`
			`import json`
			`import os`


			`def data_to_forest(filename):`

			`trees = []`
			`rows = []`

			`with open(filename, newline="") as f:`
			`reader = csv.reader(f)`
			`for row in reader:`
			`rows.append(row)`

			`for i in range(1, len(rows[0])):`
			`tree = {}`
			`tree["tree_id"] = "upwork" + str(i)`
			`tree["context"] = ""`
			`tree["first_story_block"] = rows[1][i]`
			`tree["action_results"] = []`
			`current_action_results = tree["action_results"]`
			`row_ind = 2`
			`while row_ind < len(rows):`
			`action_result = {}`
			`action_result["action"] = rows[row_ind][i]`
			`if row_ind + 1 < len(rows):`
			`action_result["result"] = rows[row_ind + 1][i]`
			`else:`
			`action_result["result"] = None`
			`action_result["action_results"] = []`
			`current_action_results.append(action_result)`
			`current_action_results = action_result["action_results"]`
			`row_ind += 2`
			`trees.append(tree)`

			`return trees`


			`def build_action_samples_helper(context, story_block, action_results, path, tree_id):`

			`samples = []`

			`for i, action_result in enumerate(action_results):`
			`new_path = path[:]`
			`new_path.append(i)`
			`if (`
			`len(action_result["action_results"]) is 0`
			`and action_result["result"] is not None`
			`):`
			`row = [`
			`tree_id,`
			`"".join(str(x) for x in new_path),`
			`context,`
			`story_block,`
			`action_result["action"],`
			`action_result["result"],`
			`]`
			`samples.append(row)`
			`else:`
			`sub_result = build_action_samples_helper(`
			`context,`
			`action_result["result"],`
			`action_result["action_results"],`
			`new_path,`
			`tree_id,`
			`)`
			`samples += sub_result`

			`return samples`


			`def make_write_actions_batch(forest, filename):`
			`# Traverse to the bottom levels of each tree`
			`with open(filename, mode="w", newline="") as file:`
			`writer = csv.writer(`
			`file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL`
			`)`
			`writer.writerow(`
			`[`
			`"tree_id",`
			`"path",`
			`"context",`
			`"story_block_1",`
			`"previous_action",`
			`"story_block_2",`
			`]`
			`)`

			`for tree in forest:`
			`first_story_block = tree["first_story_block"]`
			`samples = build_action_samples_helper(`
			`tree["context"],`
			`first_story_block,`
			`tree["action_results"],`
			`[],`
			`tree["tree_id"],`
			`)`

			`for sample in samples:`
			`writer.writerow(sample)`


			`def build_result_samples_helper(`
			`context, story_block, parent_action_result, path, tree_id`
			`):`

			`samples = []`
			`action_results = parent_action_result["action_results"]`

			`for i, action_result in enumerate(action_results):`
			`new_path = path[:]`
			`new_path.append(i)`
			`if action_result["result"] is None:`
			`row = [`
			`tree_id,`
			`"".join(str(x) for x in new_path),`
			`context,`
			`story_block,`
			`parent_action_result["action"],`
			`parent_action_result["result"],`
			`action_result["action"],`
			`]`
			`samples.append(row)`
			`else:`
			`sub_result = build_result_samples_helper(`
			`context,`
			`parent_action_result["result"],`
			`action_result,`
			`new_path,`
			`tree_id,`
			`)`
			`samples += sub_result`

			`return samples`


			`def make_write_results_batch(forest, filename):`

			`with open(filename, mode="w", newline="") as file:`
			`writer = csv.writer(`
			`file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL`
			`)`
			`writer.writerow(`
			`[`
			`"tree_id",`
			`"path",`
			`"context",`
			`"story_block_1",`
			`"previous_action_1",`
			`"story_block_2",`
			`"previous_action_2",`
			`]`
			`)`

			`for tree in forest:`
			`first_story_block = tree["first_story_block"]`
			`samples = []`
			`for i, action_result in enumerate(tree["action_results"]):`
			`path = [i]`
			`samples += build_result_samples_helper(`
			`tree["context"],`
			`first_story_block,`
			`action_result,`
			`path,`
			`tree["tree_id"],`
			`)`

			`for sample in samples:`
			`writer.writerow(sample)`


			`def save_tree(tree, filename):`
			`with open(filename, "w") as fp:`
			`json.dump(tree, fp)`


			`def save_forest(forest, forest_name):`

			`if not os.path.exists("./" + forest_name):`
			`os.mkdir("./" + forest_name)`
			`for tree in forest:`
			`save_tree(tree, "./" + forest_name + "/" + tree["tree_id"] + ".json")`


			`def load_tree(filename):`
			`with open(filename, "r") as fp:`
			`tree = json.load(fp)`
			`return tree`


			`def load_forest(forest_name):`

			`files = os.listdir("./" + forest_name)`
			`forest = []`
			`for file in files:`
			`forest.append(load_tree("./" + forest_name + "/" + file))`
			`return forest`


			`def csv_to_dict(file):`
			`update_dict = {}`
			`field_names = []`

			`with open(file, newline="") as f:`
			`reader = csv.reader(f)`
			`for row in reader:`
			`if len(update_dict) is 0:`
			`for item in row:`
			`update_dict[item] = []`
			`field_names.append(item)`

			`else:`
			`for i, item in enumerate(row):`
			`update_dict[field_names[i]].append(item)`

			`return update_dict`


			`def update_forest_with_results(forest_name, update_file):`
			`update_dict = csv_to_dict(update_file)`
			`tree_dict = {}`
			`tree_filenames = os.listdir("./" + forest_name)`

			`for file_name in tree_filenames:`
			`tree = load_tree("./" + forest_name + "/" + file_name)`
			`tree_dict[tree["tree_id"]] = tree`

			`for i in range(len(update_dict["Input.tree_id"])):`
			`tree = tree_dict[update_dict["Input.tree_id"][i]]`
			`current_action_results = tree`
			`for choice in update_dict["Input.path"][i]:`
			`choice_num = int(choice)`
			`current_action_results = current_action_results["action_results"][`
			`choice_num`
			`]`

			`current_action_results["result"] = update_dict["Answer.result"][i]`

			`return tree_dict.values()`


			`def update_forest_with_actions(forest_name, update_file):`
			`update_dict = csv_to_dict(update_file)`
			`tree_dict = {}`
			`tree_filenames = os.listdir("./" + forest_name)`

			`for file_name in tree_filenames:`
			`tree = load_tree("./" + forest_name + "/" + file_name)`
			`tree_dict[tree["tree_id"]] = tree`

			`for i in range(len(update_dict["Input.tree_id"])):`
			`tree = tree_dict[update_dict["Input.tree_id"][i]]`
			`current_action_results = tree`
			`for choice in update_dict["Input.path"][i]:`
			`choice_num = int(choice)`
			`current_action_results = current_action_results["action_results"][`
			`choice_num`
			`]`

			`current_action_results["action_results"].append(`
			`{`
			`"action": update_dict["Answer.action_1"][i],`
			`"result": None,`
			`"action_results": [],`
			`}`
			`)`
			`current_action_results["action_results"].append(`
			`{`
			`"action": update_dict["Answer.action_2"][i],`
			`"result": None,`
			`"action_results": [],`
			`}`
			`)`

			`return tree_dict.values()`


			`tree = data_to_forest("upwork.csv")`
			`for i, story in enumerate(tree):`
			`save_tree(story, "crowdsourcedstory" + str(i) + ".json")`
			`print("done")`