+ idxtool

A comprehensive GPT index and searching tool for mass GPT management (without using databases, yet).
2025-08-21 02:17:15 -04:00 · 2023-12-11 18:26:49 -08:00 · 2023-12-11 18:26:49 -08:00 · acb6cf7f80
commit acb6cf7f80
parent e779123051
4 changed files with 462 additions and 0 deletions
--- a/.scripts/README.md
+++ b/.scripts/README.md
@ -0,0 +1,72 @@
+# idxtool
+
+The `idxtool` is a GPT indexing and searching tool for the CSP repo (ChatGPT System Prompt).
+
+## Command line
+
+```
+usage: idxtool.py [-h] [--update-logo UPDATE_LOGO] [--toc [TOC]]
+                  [--update-description UPDATE_DESCRIPTION]
+                  [--find-gptfile FIND_GPTFILE] [--find-gpttoc FIND_GPTTOC]
+                  [--parse-gptfile PARSE_GPTFILE] [--rename RENAME]
+
+idxtool: A GPT indexing and searching tool for the CSP repo
+
+options:
+  -h, --help            show this help message and exit
+  --update-logo UPDATE_LOGO
+                        Update the logos of the GPT file
+  --toc [TOC]           Rebuild the table of contents (TOC.md) file
+  --update-description UPDATE_DESCRIPTION
+                        Update the descriptions of the GPT file
+  --find-gptfile FIND_GPTFILE
+                        Find a GPT by its ID or name
+  --find-gpttoc FIND_GPTTOC
+                        Searches the TOC.md file for the given gptid or free
+                        style string
+  --parse-gptfile PARSE_GPTFILE
+                        Parses a GPT file name
+  --rename RENAME       Rename the file name to include its GPT ID
+```
+
+## Features
+
+- Update Logos: Use `--update-logo [filename]` to update the logos of the GPT file.
+- Rebuild TOC: Use `--toc` to rebuild the table of contents (TOC.md) file.
+- Update Descriptions: Use `--update-description [filename]` to update the descriptions of the GPT file.
+- Find GPT File: Use `--find-gptfile [gptid or gpt name in quotes]` to find a GPT by its ID or name.
+- Find GPT in TOC: Use `--find-gpttoc [gptid or string]` to search the TOC.md file for a given gptid or free style string.
+- Rename GPT: Use `--rename [filename]` to rename the file name to include its GPT ID.
+- Help: Use `--help` to display the help message and usage instructions.
+
+## Usage
+
+To use the tool, run the following command in your terminal with the appropriate arguments:
+
+```bash
+python idxtool.py [arguments]
+```
+
+Replace `[arguments]` with one of the feature commands listed above.
+
+## Example
+
+To update the logos of a GPT file named `example_gpt.json`, run:
+
+```bash
+python idxtool.py --update-logo example_gpt.json
+```
+
+## Installation
+
+No additional installation is required. Ensure that you have Python installed on your system to run the tool.
+
+## Contributing
+
+Contributions to `idxtool` are welcome. Please submit pull requests or issues to the CSP repo for review.
+
+## License
+
+This tool is open-sourced under the GNU General Public License (GPL). Under this license, you are free to use, modify, and redistribute this software, provided that all copies and derivative works are also licensed under the GPL.
+
+For more details, see the [GPLv3 License](https://www.gnu.org/licenses/gpl-3.0.html).
--- a/.scripts/gptparser.py
+++ b/.scripts/gptparser.py
@ -0,0 +1,145 @@
+"""
+GPT parsing module.
+
+The GPT markdown files have to adhere to a very specific format described in the README.md file in the root of the CSP project.
+"""
+
+import os, re
+from collections import namedtuple
+from typing import Union, Tuple, Generator
+
+GPT_BASE_URL = 'https://chat.openai.com/g/g-'
+GPT_BASE_URL_L = len(GPT_BASE_URL)
+FIELD_PREFIX = 'GPT'
+
+GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)
+
+GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])
+GptIdentifier = namedtuple('GptIdentifier', ['id', 'name'])
+
+# Description of the fields supported by GPT markdown files.
+SUPPORTED_FIELDS = {
+    'url':              GptFieldInfo(0, 'URL'),
+    'title':            GptFieldInfo(1, 'Title'),
+    'description':      GptFieldInfo(2, 'Description'),
+    'logo':             GptFieldInfo(3, 'Logo'),
+    'instructions':     GptFieldInfo(4, 'Instructions'),
+    'actions':          GptFieldInfo(5, 'Actions'),
+    'kb_files_list':    GptFieldInfo(6, 'KB Files List'),
+    'extras':           GptFieldInfo(7, 'Extras')
+}
+"""
+Dictionary of the fields supported by GPT markdown files:
+- The key should always be in lower case
+- The GPT markdown file will have the form: {FIELD_PREFIX} {key}: {value}
+"""
+
+class GptMarkdownFile:
+    """
+    A class to represent a GPT markdown file.
+    """
+    def __init__(self, fields={}, filename: str = '') -> None:
+        self.fields = fields
+        self.filename = filename
+
+    def get(self, key: str, strip: bool = True) -> Union[str, None]:
+        """
+        Return the value of the field with the specified key.
+        :param key: str, key of the field.
+        :return: str, value of the field.
+        """
+        key = key.lower()
+        if key == 'version':
+            m = GPT_FILE_VERSION_RE.search(self.filename)
+            return m.group(1) if m else ''
+
+        v = self.fields.get(key)
+        return v.strip() if strip else v
+    
+    def id(self) -> Union[GptIdentifier, None]:
+        """
+        Return the GPT identifier.
+        :return: GptIdentifier object.
+        """
+        url = self.fields.get('url')
+        if url and url.startswith(GPT_BASE_URL):
+            id = url[GPT_BASE_URL_L:].split('\n')[0]
+            i = id.find('-')
+            if i != -1:
+                return GptIdentifier(id[:i], id[i+1:].strip())
+            else:
+                return GptIdentifier(id, '')
+        return None
+
+    def __str__(self) -> str:
+        sorted_fields = sorted(self.fields.items(), key=lambda x: SUPPORTED_FIELDS[x[0]].order)
+        # Check if the field value contains the start marker of the markdown block and add a blank line before it
+        field_strings = []
+        for key, value in sorted_fields:
+            if value:
+                # Only replace the first occurrence of ```markdown
+                modified_value = value.replace("```markdown", "\r\n```markdown", 1)
+                field_string = f"{FIELD_PREFIX} {SUPPORTED_FIELDS[key].display}: {modified_value}"
+                field_strings.append(field_string)
+        return "\r\n".join(field_strings)
+
+    @staticmethod
+    def parse(file_path: str) -> Union['GptMarkdownFile', Tuple[bool, str]]:
+        """
+        Parse a markdown file and return a GptMarkdownFile object.
+        :param file_path: str, path to the markdown file.
+        :return: GptMarkdownFile if successful, otherwise a tuple with False and an error message.
+        """
+        if not os.path.exists(file_path):
+            return (False, f"File '{file_path}' does not exist.")
+
+        with open(file_path, 'r', encoding='utf-8') as file:
+            fields = {key.lower(): [] for key in SUPPORTED_FIELDS.keys()}
+            field_re = re.compile(f"^\s*{FIELD_PREFIX}\s+({'|'.join(fields.keys())}):", re.IGNORECASE)
+            current_field = None
+            for line in file:
+                if m := field_re.match(line):
+                    current_field = m.group(1).lower()
+                    line = line[len(m.group(0)):].strip()
+
+                if current_field:
+                    if current_field not in SUPPORTED_FIELDS:
+                        return (False, f"Field '{current_field}' is not supported.")
+
+                    fields[current_field].append(line)
+
+        gpt = GptMarkdownFile(
+            {key: ''.join(value) for key, value in fields.items()},
+            filename=file_path)
+        return (True, gpt)
+
+    def save(self, file_path: str) -> Tuple[bool, Union[str, None]]:
+        """
+        Save the GptMarkdownFile object to a markdown file.
+        :param file_path: str, path to the markdown file.
+        """
+        try:
+            with open(file_path, 'w', encoding='utf-8') as file:
+                file.write(str(self))
+            return (True, None)
+        except Exception as e:
+            return (False, f"Failed to save file '{file_path}': {e}")
+
+
+def get_prompts_path() -> str:
+    """Return the path to the prompts directory."""
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts'))
+
+def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]:
+    """Enumerate all the GPT files in the prompts directory."""
+    prompts_path = get_prompts_path()
+    for file_path in os.listdir(prompts_path):
+        _, ext = os.path.splitext(file_path)
+        if ext != '.md':
+            continue
+        file_path = os.path.join(prompts_path, file_path)
+        ok, gpt = GptMarkdownFile.parse(file_path)
+        if ok:
+            yield (True, gpt)
+        else:
+            yield (False, f"Failed to parse '{file_path}': {gpt}")
--- a/.scripts/idxtool.py
+++ b/.scripts/idxtool.py
@ -0,0 +1,208 @@
+"""
+idxtool is a script is used to perform various GPT indexing and searching tasks
+
+- Reformat all the GPT files in the source path and save them to the destination path.
+- Rename all the GPTs to include their ChatGPT/g/ID in the filename.
+- Generate TOC
+- etc.
+"""
+
+import sys, os, argparse
+from gptparser import GptMarkdownFile, enum_gpts
+from typing import Tuple
+from urllib.parse import quote
+
+TOC_FILENAME = 'TOC.MD'
+TOC_GPT_MARKER_LINE = '- GPTs'
+
+def get_toc_file() -> str:
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', TOC_FILENAME))    
+
+def update_logo(filename):
+    if filename == '*':
+        print("TODO: Updating logo for all GPT files")
+    else:
+        print(f"TODO: Updating logo with file: {filename}")
+    raise NotImplementedError
+
+def update_description(filename):
+    if filename == '*':
+        print("TODO: Updating description for all GPT files")
+    else:
+        print(f"TODO Updating description with file: {filename}")
+    raise NotImplementedError
+
+def rename_gpt(filename):
+    if filename == '*':
+        print("TODO: Renaming all GPT files to include their ID")
+    else:
+        print(f"TODO: Renaming GPT file to include its ID: {filename}")
+    raise NotImplementedError
+
+
+def reformat_gpt_files(src_path: str, dst_path: str) -> Tuple[bool, str]:
+    """
+    Reformat all the GPT files in the source path and save them to the destination path.
+    :param src_path: str, path to the source directory.
+    :param dst_path: str, path to the destination directory.
+    """
+    if not os.path.exists(src_path):
+        return (False, f"Source path '{src_path}' does not exist.")
+
+    if not os.path.exists(dst_path):
+        os.makedirs(dst_path)
+
+    print(f"Reformatting GPT files in '{src_path}' and saving them to '{dst_path}'...")
+
+    nb_ok = nb_total = 0
+    for src_file_path in os.listdir(src_path):
+        _, ext = os.path.splitext(src_file_path)
+        if ext != '.md':
+            continue
+        nb_total += 1
+        dst_file_path = os.path.join(dst_path, src_file_path)
+        src_file_path = os.path.join(src_path, src_file_path)
+        ok, gpt = GptMarkdownFile.parse(src_file_path)
+        if ok:
+            ok, msg = gpt.save(dst_file_path)
+            if ok:
+                id = gpt.id()
+                if id:
+                    info = f"; id={id.id}"
+                    if id.name:
+                        info += f", name='{id.name}'"
+                else:
+                    info = ''
+                print(f"[+] saved '{os.path.basename(src_file_path)}'{info}")
+                nb_ok += 1
+            else:
+                print(f"[!] failed to save '{src_file_path}': {msg}")
+        else:
+            print(f"[!] failed to parse '{src_file_path}': {gpt}")
+
+    msg = f"Reformatted {nb_ok} out of {nb_total} GPT files."
+    ok = nb_ok == nb_total
+    return (ok, msg)
+
+
+def parse_gpt_file(filename) -> Tuple[bool, str]:
+    ok, gpt = GptMarkdownFile.parse(filename)
+    if ok:
+        file_name_without_ext = os.path.splitext(os.path.basename(filename))[0]
+        dst_fn = os.path.join(
+            os.path.dirname(filename), 
+            f"{file_name_without_ext}.new.md")
+        gpt.save(dst_fn)
+    else:
+        print(gpt)
+
+    return (ok, gpt)
+
+
+def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:    
+    """
+    Rebuilds the table of contents (TOC.md) file by reading all the GPT files in the prompts/gpts directory.
+    """
+    if not toc_out:
+        print(f"Rebuilding Table of Contents (TOC.md) in place")
+    else:
+        print(f"Rebuilding Table of Contents (TOC.md) to '{toc_out}'")
+
+    toc_in = get_toc_file()
+    if not toc_out:
+        toc_out = toc_in
+
+    if not os.path.exists(toc_in):
+        return (False, f"TOC File '{toc_in}' does not exist.")
+
+    
+    # Read the TOC file and find the marker line for the GPT instructions
+    out = []
+    marker_found = False
+    with open(toc_in, 'r', encoding='utf-8') as file:
+        for line in file:
+            if line.startswith(TOC_GPT_MARKER_LINE):
+                marker_found = True
+                break
+            else:
+                out.append(line)
+    if not marker_found:
+        return (False, f"Could not find the marker '{TOC_GPT_MARKER_LINE}' in '{toc_in}'.")
+    
+    # Write the TOC file all the way up to the marker line
+    try:
+        ofile = open(toc_out, 'w', encoding='utf-8')
+    except:
+        return (False, f"Failed to open '{toc_out}' for writing.")
+
+    # Write the marker line and each GPT entry
+    out.append(f"{TOC_GPT_MARKER_LINE}\n")
+
+    nb_ok = nb_total = 0
+    for ok, gpt in enum_gpts():
+        nb_total += 1
+        if ok and (id := gpt.id()):
+            nb_ok += 1
+            file_link = f"./prompts/gpts/{quote(os.path.basename(gpt.filename))}"
+            version = f" {gpt.get('version')}" if gpt.get('version') else ''
+            out.append(f"  - [{gpt.get('title')}{version} (id: {id.id})]({file_link})\n")
+        else:
+            print(f"[!] {gpt}")
+
+    ofile.writelines(out)
+    ofile.close()
+    msg = f"Generated TOC with {nb_ok} out of {nb_total} GPTs."
+
+    ok = nb_ok == nb_total
+    if ok:
+        print(msg)
+    return (ok, msg)
+    
+
+def find_gptfile(keyword):
+    print(f"TODO: Finding GPT file with ID or name: {keyword}")
+    raise NotImplementedError
+
+
+def find_gpt_in_toc(gptid_or_string):
+    print(f"TODO: Searching TOC.md for GPT ID or string: {gptid_or_string}")
+    raise NotImplementedError
+
+def main():
+    parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo')
+    
+    parser.add_argument('--update-logo', type=str, help='Update the logos of the GPT file')
+    parser.add_argument('--toc', nargs='?', const='', type=str, help='Rebuild the table of contents (TOC.md) file')
+    parser.add_argument('--update-description', type=str, help='Update the descriptions of the GPT file')
+    parser.add_argument('--find-gptfile', type=str, help='Find a GPT by its ID or name')
+    parser.add_argument('--find-gpttoc', type=str, help='Searches the TOC.md file for the given gptid or free style string')
+    parser.add_argument('--parse-gptfile', type=str, help='Parses a GPT file name')
+    parser.add_argument('--rename', type=str, help='Rename the file name to include its GPT ID')
+
+    # Handle arguments
+    ok = True
+
+    args = parser.parse_args()
+    if args.update_logo:
+        update_logo(args.update_logo)
+    if args.parse_gptfile:
+        ok, err = parse_gpt_file(args.parse_gptfile)
+        if not ok:
+            print(err)
+    if args.toc is not None:
+        ok, err = rebuild_toc(args.toc)
+        if not ok:
+            print(err)
+    if args.update_description:
+        update_description(args.update_description)
+    if args.find_gptfile:
+        find_gptfile(args.find_gptfile)
+    if args.find_gpttoc:
+        find_gpt_in_toc(args.find_gpttoc)
+    if args.rename:
+        rename_gpt(args.rename)
+
+    sys.exit(0 if ok else 1)
+
+if __name__ == "__main__":
+    main()
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,37 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "idxtool.py -- Update Logo",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/.scripts/idxtool.py",
+            "args": ["--update-logo", "logo.png"],
+            "console": "integratedTerminal"
+        },
+        {
+            "name": "idxtool.py -- Rebuild TOC",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/.scripts/idxtool.py",
+            "args": ["--toc"],
+            "console": "integratedTerminal"
+        },
+        {
+            "name": "idxtool.py -- Find GPT File",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/.scripts/idxtool.py",
+            "args": ["--find-gptfile", "GPT3"],
+            "console": "integratedTerminal"
+        },
+        {
+            "name": "idxtool.py -- Help",
+            "type": "python",
+            "request": "launch",
+            "program": "${workspaceFolder}/.scripts/idxtool.py",
+            "args": ["--help"],
+            "console": "integratedTerminal"
+        }
+    ]
+}