system-prompts/.scripts/idxtool.py

"""
idxtool is a script is used to perform various GPT indexing and searching tasks

- Find a GPT file by its ID or full ChatGPT URL or via a file containing a list of GPT IDs.
- Rename all the GPTs to include their ChatGPT/g/ID in the filename.
- Generate TOC
- etc.
"""

import sys, os, argparse
from typing import Tuple
from urllib.parse import quote

import gptparser
from gptparser import enum_gpts, parse_gpturl, enum_gpt_files, get_prompts_path

TOC_FILENAME = 'TOC.md'
TOC_GPT_MARKER_LINE = '- GPTs'

def get_toc_file() -> str:
    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', TOC_FILENAME))

def rename_gpts():
    effective_rename = nb_ok = nb_total = 0

    for ok, gpt in enum_gpts():
        nb_total += 1
        if not ok or not (id := gpt.id()):
            print(f"[!] {gpt.filename}")
            continue
        # Skip files with correct prefix
        basename = os.path.basename(gpt.filename)
        if basename.startswith(f"{id.id}_"):
            nb_ok += 1
            continue
        effective_rename += 1

        # New full file name with ID prefix
        new_fn = os.path.join(os.path.dirname(gpt.filename), f"{id.id}_{basename}")
        print(f"[+] {basename} -> {os.path.basename(new_fn)}")
        if os.system(f"git mv \"{gpt.filename}\" \"{new_fn}\"") == 0:
            nb_ok += 1
            continue

        # If git mv failed, then try os.rename
        try:
            os.rename(gpt.filename, new_fn)
            nb_ok += 1
            continue
        except OSError as e:
            print(f"Rename error: {e.strerror}")

    msg = f"Renamed {nb_ok} out of {nb_total} GPT files."
    ok = nb_ok == nb_total
    if effective_rename == 0:
        msg = f"All {nb_total} GPT files were already renamed. No action taken."
        print(msg)

    return (ok, msg)


def parse_gpt_file(filename) -> Tuple[bool, str]:
    ok, gpt = gptparser.GptMarkdownFile.parse(filename)
    if ok:
        file_name_without_ext = os.path.splitext(os.path.basename(filename))[0]
        dst_fn = os.path.join(
            os.path.dirname(filename),
            f"{file_name_without_ext}.new.md")
        gpt.save(dst_fn)
    else:
        print(gpt)

    return (ok, gpt)


def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:
    """
    Rebuilds the table of contents (TOC.md) file, generating only the Prompt Collections section
    that links to the TOC.md files in the prompts subdirectories.
    The TOC file is completely regenerated, not preserving any existing content.
    """
    if not toc_out:
        print(f"Rebuilding Table of Contents (TOC.md) in place")
    else:
        print(f"Rebuilding Table of Contents (TOC.md) to '{toc_out}'")

    toc_in = get_toc_file()
    if not toc_out:
        toc_out = toc_in

    # Open the output file for writing (overwriting any existing content)
    try:
        ofile = open(toc_out, 'w', encoding='utf-8')
    except:
        return (False, f"Failed to open '{toc_out}' for writing.")

    # Write a header for the TOC file
    out = []
    out.append("# ChatGPT System Prompts - Table of Contents\n\n")
    out.append("This document contains a table of contents for the ChatGPT System Prompts repository.\n\n")

    # Add links to TOC.md files in prompts directory subdirectories
    prompts_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts'))
    if os.path.exists(prompts_base_path):
        prompt_dirs = []
        for dirname in os.listdir(prompts_base_path):
            dir_path = os.path.join(prompts_base_path, dirname)
            toc_path = os.path.join(dir_path, TOC_FILENAME)
            # Only include directories that have a TOC.md file
            if os.path.isdir(dir_path) and os.path.exists(toc_path):
                prompt_dirs.append(dirname)

        if prompt_dirs:
            out.append("## Prompt Collections\n\n")
            prompt_dirs.sort()  # Sort alphabetically
            for dirname in prompt_dirs:
                # Create a relative link to the subdirectory TOC file
                link = f"./prompts/{dirname}/{TOC_FILENAME}"
                out.append(f"- [{dirname} Collection]({link})\n")

    ofile.writelines(out)
    ofile.close()
    msg = f"Generated TOC with Prompt Collections only."

    return (True, msg)

def make_template(url, verbose=True):
    """Creates an empty GPT template file from a ChatGPT URL"""
    if not (gpt_info := parse_gpturl(url)):
        msg = f"Invalid ChatGPT URL: '{url}'"
        if verbose:
            print(msg)
        return (False, msg)

    filename = os.path.join(get_prompts_path(), f"{gpt_info.id}_RENAMEME.md")
    if os.path.exists(filename):
        msg = f"File '{filename}' already exists."
        if verbose:
            print(msg)
        return (False, msg)

    with open(filename, 'w', encoding='utf-8') as file:
        for field, info in gptparser.SUPPORTED_FIELDS.items():
            if field == 'verif_status':
                continue
            if field == 'url':
                file.write(f"{gptparser.FIELD_PREFIX} {info.display}: {url}\n\n")
            elif field == 'instructions':
                file.write(f"{gptparser.FIELD_PREFIX} {info.display}:\n```markdown\n{info.display} here...\n```\n\n")
            elif field == 'logo':
                file.write(f"{gptparser.FIELD_PREFIX} {info.display}: <img ...>\n\n")
            else:
                file.write(f"{gptparser.FIELD_PREFIX} {info.display}: {info.display} goes here...\n\n")

    msg = f"Created template '{filename}' for URL '{url}'"
    if verbose:
        print(msg)
    return (True, msg)

def find_gptfile(keyword, verbose=True):
    """Find a GPT file by its ID or full ChatGPT URL
    The ID can be prefixed with '@' to indicate a file containing a list of GPT IDs.
    """
    keyword = keyword.strip()
    # Response file with a set of GPT IDs
    if keyword.startswith('@'):
        with open(keyword[1:], 'r', encoding='utf-8') as file:
            ids = set()
            for line in file:
                line = line.strip()
                # Skip comments
                if line.startswith('#'):
                    continue
                # If the line is a GPT URL, then extract the ID
                if gpt_info := parse_gpturl(line):
                    ids.add(gpt_info.id)
                    continue
                # If not a GPT URL, then it's a GPT ID
                ids.add(line)
    elif gpt_info := parse_gpturl(keyword):
        # A single GPT URL
        ids = {gpt_info.id}
    else:
        # A single GPT ID
        ids = {keyword}

    if verbose:
        print(f'Looking for GPT files with IDs: {", ".join(ids)}')
    matches = []
    for id, filename in enum_gpt_files():
        if id in ids:
            if verbose:
                print(filename)
            matches.append((id, filename))

    return matches

def generate_toc_for_prompts_dirs() -> Tuple[bool, str]:
    """
    Generates a single TOC.md file for each of the three main directories under prompts:
    gpts, official-product, and opensource-prj.
    For gpts directory, uses the original GPT-specific TOC generation logic.
    For other directories, includes all markdown files in the directory and its subdirectories.
    """
    prompts_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts'))
    if not os.path.exists(prompts_base_path):
        return (False, f"Prompts directory '{prompts_base_path}' does not exist.")

    print(f"Generating TOC.md files for main directories under '{prompts_base_path}'")
    success = True
    messages = []

    # Main directories we want to process
    main_dirs = ["gpts", "official-product", "opensource-prj"]

    def collect_files_recursively(dir_path, base_path=None):
        """
        Recursively collect all markdown files from a directory and its subdirectories.

        Args:
            dir_path: The current directory being processed
            base_path: The base directory path used for computing relative paths

        Returns:
            A list of tuples (relative_path, filename, title) where:
            - relative_path is the path relative to the base directory
            - filename is the name of the file
            - title is the extracted title from the file
        """
        if base_path is None:
            base_path = dir_path

        result = []

        try:
            items = os.listdir(dir_path)
        except (FileNotFoundError, PermissionError) as e:
            print(f"Warning: Could not access directory '{dir_path}': {str(e)}")
            return result

        for item in items:
            item_path = os.path.join(dir_path, item)

            # Skip TOC.md
            if item == TOC_FILENAME:
                continue

            try:
                if os.path.isfile(item_path) and item.endswith('.md'):
                    # Check if file exists and is readable
                    if not os.path.exists(item_path):
                        print(f"Warning: The file {item_path} does not exist")
                        continue

                    # Get relative path from the base directory to the file's directory
                    rel_dir_path = os.path.relpath(os.path.dirname(item_path), base_path)
                    if rel_dir_path == '.':
                        rel_dir_path = ''

                    # Extract title from the file
                    title = os.path.splitext(item)[0]
                    try:
                        with open(item_path, 'r', encoding='utf-8') as f:
                            first_line = f.readline().strip()
                            if first_line.startswith('# '):
                                title = first_line[2:].strip()
                    except Exception as e:
                        print(f"Warning: Could not read file '{item_path}': {str(e)}")

                    result.append((rel_dir_path, item, title))

                elif os.path.isdir(item_path):
                    # Recursively collect files from subdirectories using the same base_path
                    result.extend(collect_files_recursively(item_path, base_path))
            except Exception as e:
                print(f"Warning: Error processing '{item_path}': {str(e)}")

        return result

    def generate_gpts_toc(dir_path):
        """Generate TOC.md for gpts directory using the original GPT-specific logic.
        The file is completely regenerated, not preserving any existing content."""
        toc_path = os.path.join(dir_path, TOC_FILENAME)
        try:
            with open(toc_path, 'w', encoding='utf-8') as toc_file:
                toc_file.write(f"# gpts \n\n")

                # Count GPTs
                enumerated_gpts = list(enum_gpts())
                nb_ok = sum(1 for ok, gpt in enumerated_gpts if ok and gpt.id())

                toc_file.write(f"## GPTs ({nb_ok} total)\n\n")

                nb_ok = nb_total = 0
                gpts = []
                for ok, gpt in enumerated_gpts:
                    nb_total += 1
                    if ok:
                        if gpt_id := gpt.id():
                            nb_ok += 1
                            gpts.append((gpt_id, gpt))
                        else:
                            print(f"[!] No ID detected: {gpt.filename}")
                    else:
                        print(f"[!] {gpt}")

                # Consistently sort the GPTs by title
                def gpts_sorter(key):
                    gpt_id, gpt = key
                    version = f"{gpt.get('version')}" if gpt.get('version') else ''
                    return f"{gpt.get('title')}{version} (id: {gpt_id.id}))"
                gpts.sort(key=gpts_sorter)

                for id, gpt in gpts:
                    file_link = f"./{quote(os.path.basename(gpt.filename))}"
                    version = f" {gpt.get('version')}" if gpt.get('version') else ''
                    toc_file.write(f"- [{gpt.get('title')}{version} (id: {id.id})]({file_link})\n")

            return (True, f"Generated TOC.md for 'gpts' with {nb_ok} out of {nb_total} GPTs.")
        except Exception as e:
            return (False, f"Error generating TOC.md for 'gpts': {str(e)}")

    # Process each top-level directory under prompts/
    for dirname in main_dirs:
        dir_path = os.path.join(prompts_base_path, dirname)
        if not os.path.isdir(dir_path):
            messages.append(f"Directory '{dirname}' does not exist, skipping")
            continue

        # For gpts directory, use the original GPT-specific logic
        if dirname == "gpts":
            ok, msg = generate_gpts_toc(dir_path)
            success = success and ok
            messages.append(msg)
            continue

        # For other directories, use the new recursive logic
        # Collect all markdown files in this directory and its subdirectories
        md_files = collect_files_recursively(dir_path)

        if not md_files:
            messages.append(f"No markdown files found in '{dirname}' or its subdirectories, skipping TOC generation")
            continue

        # Generate TOC.md for this directory
        toc_path = os.path.join(dir_path, TOC_FILENAME)
        try:
            with open(toc_path, 'w', encoding='utf-8') as toc_file:
                toc_file.write(f"# {dirname} \n\n")

                # Group files by their subdirectory
                files_by_dir = {}
                for rel_dir_path, filename, title in md_files:
                    if rel_dir_path not in files_by_dir:
                        files_by_dir[rel_dir_path] = []
                    files_by_dir[rel_dir_path].append((filename, title))

                # First list files in the root directory
                if '' in files_by_dir:
                    root_files = files_by_dir['']
                    root_files.sort()  # Sort alphabetically

                    for filename, title in root_files:
                        toc_file.write(f"- [{title}](./{quote(filename)})\n")

                    # Add a separator if we have subdirectories
                    if len(files_by_dir) > 1:
                        toc_file.write("\n")

                # Then list files in subdirectories
                subdirs = [d for d in files_by_dir.keys() if d != '']
                if subdirs:
                    toc_file.write("## Subdirectories\n\n")

                    # Sort subdirectories alphabetically
                    subdirs.sort()

                    for subdir in subdirs:
                        # Write the subdirectory name as a heading
                        display_subdir = subdir.replace('\\', '/') # Ensure consistent path display
                        toc_file.write(f"### {display_subdir}\n\n")

                        # Sort files in this subdirectory alphabetically
                        subdir_files = files_by_dir[subdir]
                        subdir_files.sort()

                        for filename, title in subdir_files:
                            # Create a link with the correct relative path to the file
                            # Use os.path.join for correct path construction then replace backslashes for display
                            link_path = os.path.join(subdir, filename).replace('\\', '/')
                            toc_file.write(f"- [{title}](./{quote(link_path)})\n")

                        toc_file.write("\n")

            messages.append(f"Generated TOC.md for '{dirname}' with {len(md_files)} total files")

        except Exception as e:
            success = False
            messages.append(f"Error generating TOC.md for '{dirname}': {str(e)}")

    result_message = "\n".join(messages)
    return (success, result_message)

def main():
    parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo')

    parser.add_argument('--toc', nargs='?', const='', type=str, help='Rebuild the table of contents (TOC.md) file')
    parser.add_argument('--find-gpt', type=str, help='Find a GPT file by its ID or full ChatGPT URL')
    parser.add_argument('--template', type=str, help='Creates an empty GPT template file from a ChatGPT URL')
    parser.add_argument('--parse-gptfile', type=str, help='Parses a GPT file name')
    parser.add_argument('--rename', action='store_true', help='Rename the GPT file names to include their GPT ID')

    # Handle arguments
    ok = True

    args = parser.parse_args()
    if args.parse_gptfile:
        ok, err = parse_gpt_file(args.parse_gptfile)
        if not ok:
            print(err)
    elif args.toc is not None:
        if args.toc:
            ok, err = rebuild_toc(args.toc)
        else:
            # First rebuild the main TOC file
            ok, msg = rebuild_toc('')
            print(msg)
            # Then generate TOC files for subdirectories under prompts/
            sub_ok, sub_err = generate_toc_for_prompts_dirs()
            ok = ok and sub_ok
            err = sub_err if not sub_ok else ""
        if not ok:
            print(err)
    elif args.find_gpt:
        find_gptfile(args.find_gpt)
    elif args.template:
        make_template(args.template)
    elif args.rename:
        ok, err = rename_gpts()
        if not ok:
            print(err)

    sys.exit(0 if ok else 1)

if __name__ == "__main__":
    main()