Finished 'idxtool' features

- added alphabetical IDs sort in TOC generation - implemented multi-find 'findgpt' command line for the idxtool (it supports URLs, IDs, or response files)
2025-08-22 23:17:09 -04:00 · 2023-12-18 22:59:21 -08:00 · 2023-12-18 22:59:21 -08:00 · b4aa53116a
commit b4aa53116a
parent 63bc81b225
3 changed files with 103 additions and 92 deletions
--- a/.scripts/README.md
+++ b/.scripts/README.md
@ -2,68 +2,71 @@
 The `idxtool` is a GPT indexing and searching tool for the CSP repo (ChatGPT System Prompt).
 Contributions to `idxtool` are welcome. Please submit pull requests or issues to the CSP repo for review.
 ## Command line
 ```
-usage: idxtool.py [-h] [--update-logo UPDATE_LOGO] [--toc [TOC]]
+usage: idxtool.py [-h] [--toc [TOC]] [--find-gpt FIND_GPT] 
-                  [--update-description UPDATE_DESCRIPTION]
+                  [--parse-gptfile PARSE_GPTFILE] [--rename]
                  [--find-gptfile FIND_GPTFILE] [--find-gpttoc FIND_GPTTOC]
                  [--parse-gptfile PARSE_GPTFILE] [--rename RENAME]
 idxtool: A GPT indexing and searching tool for the CSP repo
 options:
  -h, --help            show this help message and exit
  --update-logo UPDATE_LOGO
                        Update the logos of the GPT file
  --toc [TOC]           Rebuild the table of contents (TOC.md) file
-  --update-description UPDATE_DESCRIPTION
+  --find-gpt FIND_GPT
-                        Update the descriptions of the GPT file
+                        Find a GPT file by its ID or full ChatGPT URL
  --find-gptfile FIND_GPTFILE
                        Find a GPT by its ID or name
  --find-gpttoc FIND_GPTTOC
                        Searches the TOC.md file for the given gptid or free
                        style string
  --parse-gptfile PARSE_GPTFILE
                        Parses a GPT file name
-  --rename              Rename all the GPT file names to include their GPT ID
+  --rename              Rename the GPT file names to include their GPT ID
 ```
 ## Features
 - Update Logos: Use `--update-logo [filename]` to update the logos of the GPT file.
 - Rebuild TOC: Use `--toc` to rebuild the table of contents (TOC.md) file.
- Update Descriptions: Use `--update-description [filename]` to update the descriptions of the GPT file.
+- Find GPT File: Use `--find-gpt [GPTID or Full ChatGPT URL or a response file with IDs/URLs]` to find a GPT by its ID or URL.
 - Find GPT File: Use `--find-gptfile [gptid or gpt name in quotes]` to find a GPT by its ID or name.
 - Find GPT in TOC: Use `--find-gpttoc [gptid or string]` to search the TOC.md file for a given gptid or free style string.
 - Rename GPT: Use `--rename` to rename all the GPTs to include their GPTID as prefix.
 - Help: Use `--help` to display the help message and usage instructions.
 ## Usage
 To use the tool, run the following command in your terminal with the appropriate arguments:
 ```bash
 python idxtool.py [arguments]
 ```
 Replace `[arguments]` with one of the feature commands listed above.
 ## Example
-To update the logos of a GPT file named `example_gpt.json`, run:
+To rebuild the [TOC.md](../TOC.md) file, run:
 ```bash
-python idxtool.py --update-logo example_gpt.json
+python idxtool.py --toc
 ```
-## Installation
+To find a GPT by its ID, run:
-No additional installation is required. Ensure that you have Python installed on your system to run the tool.
+```bash
 python idxtool.py --find-gpt 3rtbLUIUO
 ```
-## Contributing
+or by URL:
 ```bash
 python idxtool.py --find-gpt https://chat.openai.com/g/g-svehnI9xP-retro-adventures
 ```
 Additionally, you can have a file with a list of IDs or URLs and pass it to the `--find-gpt` option:
 ```bash
 python idxtool.py --find-gpt @gptids.txt
 ```
 (note the '@' symbol).
 The `gptids.txt` file contains a list of IDs or URLs, one per line:
 ```text
 3rtbLUIUO
 https://chat.openai.com/g/g-svehnI9xP-retro-adventures
 #vYzt7bvAm
 w2yOasK1r
 waDWNw2J3
 ```
 Contributions to `idxtool` are welcome. Please submit pull requests or issues to the CSP repo for review.
 ## License
--- a/.scripts/gptparser.py
+++ b/.scripts/gptparser.py
@ -6,15 +6,17 @@ The GPT markdown files have to adhere to a very specific format described in the
 import os, re
 from collections import namedtuple
-from typing import Union, Tuple, Generator
+from typing import Union, Tuple, Generator, Iterator
 compiled_pattern = re.compile(r'^([0-9a-z]{9})_([^\.]+)\.md$', re.IGNORECASE)
 GPT_BASE_URL = 'https://chat.openai.com/g/g-'
 GPT_BASE_URL_L = len(GPT_BASE_URL)
 FIELD_PREFIX = 'GPT'
 GPT_FILE_ID_RE = re.compile(r'^([0-9a-z]{9})_(.*)\.md$', re.IGNORECASE)
 """GPT file name regex with ID and name capture."""
 GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)
 """GPT file name regex with version capture."""
 GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])
@ -149,19 +151,15 @@ def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, Non
        else:
            yield (False, f"Failed to parse '{file_path}': {gpt}")
-def enum_gpt_files() -> Generator[str, None, None]:
+def enum_gpt_files() -> Iterator[Tuple[str, str]]:
    """
    Enumerate all the GPT files in the prompts directory while relying on the files naming convention.
    To normalize all the GPT file names, run the `idxtool.py --rename`
    """
    pattern = r'[a-z]{9}_[a-z]+\.[a-z]+'
    prompts_path = get_prompts_path()
    for file_path in os.listdir(prompts_path):
-        _, ext = os.path.splitext(file_path)
+        m = GPT_FILE_ID_RE.match(file_path)
-        if ext != '.md':
+        if not m:
            continue
        file_path = os.path.join(prompts_path, file_path)
-        yield file_path
+        yield (m.group(1), file_path)
--- a/.scripts/idxtool.py
+++ b/.scripts/idxtool.py
@ -1,14 +1,14 @@
 """
 idxtool is a script is used to perform various GPT indexing and searching tasks
- Reformat all the GPT files in the source path and save them to the destination path.
+- Find a GPT file by its ID or full ChatGPT URL or via a file containing a list of GPT IDs.
 - Rename all the GPTs to include their ChatGPT/g/ID in the filename.
 - Generate TOC
 - etc.
 """
 import sys, os, argparse
-from gptparser import GptMarkdownFile, enum_gpts, parse_gpturl
+from gptparser import GptMarkdownFile, enum_gpts, parse_gpturl, enum_gpt_files
 from typing import Tuple
 from urllib.parse import quote
@ -18,20 +18,6 @@ TOC_GPT_MARKER_LINE = '- GPTs'
 def get_toc_file() -> str:
    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', TOC_FILENAME))    
 def update_logo(filename):
    if filename == '*':
        print("TODO: Updating logo for all GPT files")
    else:
        print(f"TODO: Updating logo with file: {filename}")
    raise NotImplementedError
 def update_description(filename):
    if filename == '*':
        print("TODO: Updating description for all GPT files")
    else:
        print(f"TODO Updating description with file: {filename}")
    raise NotImplementedError
 def rename_gpts():
    nb_ok = nb_total = 0
    all_renamed_already = True
@ -117,15 +103,24 @@ def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:
    out.append(f"{TOC_GPT_MARKER_LINE}\n")
    nb_ok = nb_total = 0
    gpts = []
    for ok, gpt in enum_gpts():
        nb_total += 1
-        if ok and (id := gpt.id()):
+        if ok:
            if id := gpt.id():
                nb_ok += 1
                gpts.append((id, gpt))
            else:
                print(f"[!] No ID detected: {gpt.filename}")
        else:
            print(f"[!] {gpt}")
    # Consistently sort the GPTs by ID
    gpts.sort(key=lambda x: x[0].id)
    for id, gpt in gpts:
        file_link = f"./prompts/gpts/{quote(os.path.basename(gpt.filename))}"
        version = f" {gpt.get('version')}" if gpt.get('version') else ''
        out.append(f"  - [{gpt.get('title')}{version} (id: {id.id})]({file_link})\n")
        else:
            print(f"[!] {gpt.filename}")
    ofile.writelines(out)
    ofile.close()
@ -136,29 +131,50 @@ def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:
        print(msg)
    return (ok, msg)
-def find_gpt_in_toc(gptid_or_string):
+def find_gptfile(keyword, verbose=True):
-    print(f"TODO: Searching TOC.md for GPT ID or string: {gptid_or_string}")
+    """Find a GPT file by its ID or full ChatGPT URL
-    raise NotImplementedError
+    The ID can be prefixed with '@' to indicate a file containing a list of GPT IDs.
-
+    """
-def find_gptfile(keyword):
+    keyword = keyword.strip()
    keyword = keyword.strip().tolower()
    # Response file with a set of GPT IDs
    if keyword.startswith('@'):
-        print(f"TODO: Finding GPT file with ID: {keyword}")
+        with open(keyword[1:], 'r', encoding='utf-8') as file:
-    if gpt_info := parse_gpturl(keyword):
+            ids = set()
-        keyword = gpt_info.id
+            for line in file:
                line = line.strip()
                # Skip comments
                if line.startswith('#'):
                    continue
                # If the line is a GPT URL, then extract the ID
                if gpt_info := parse_gpturl(line):
                    ids.add(gpt_info.id)
                    continue
                # If not a GPT URL, then it's a GPT ID
                ids.add(line)
    elif gpt_info := parse_gpturl(keyword):
        # A single GPT URL
        ids = {gpt_info.id}
    else:
        # A single GPT ID
        ids = {keyword}
    if verbose:
        print(f'Looking for GPT files with IDs: {", ".join(ids)}')
    matches = []
    for id, filename in enum_gpt_files():
        if id in ids:
            if verbose:
                print(filename)
            matches.append((id, filename))
    return matches
    print(f"TODO: Finding GPT with ID: {keyword}")
    raise NotImplementedError
 def main():
    parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo')
    parser.add_argument('--update-logo', type=str, help='Update the logos of the GPT file')
    parser.add_argument('--toc', nargs='?', const='', type=str, help='Rebuild the table of contents (TOC.md) file')
-    parser.add_argument('--update-description', type=str, help='Update the descriptions of the GPT file')
+    parser.add_argument('--find-gpt', type=str, help='Find a GPT file by its ID or full ChatGPT URL')
    parser.add_argument('--find-gptfile', type=str, help='Find a GPT by its ID or name')
    parser.add_argument('--find-gpttoc', type=str, help='Searches the TOC.md file for the given gptid or free style string')
    parser.add_argument('--parse-gptfile', type=str, help='Parses a GPT file name')
    parser.add_argument('--rename', action='store_true', help='Rename the GPT file names to include their GPT ID')
@ -166,23 +182,17 @@ def main():
    ok = True
    args = parser.parse_args()
    if args.update_logo:
        update_logo(args.update_logo)
    if args.parse_gptfile:
        ok, err = parse_gpt_file(args.parse_gptfile)
        if not ok:
            print(err)
-    if args.toc is not None:
+    elif args.toc is not None:
        ok, err = rebuild_toc(args.toc)
        if not ok:
            print(err)
-    if args.update_description:
+    elif args.find_gpt:
-        update_description(args.update_description)
+        find_gptfile(args.find_gpt)
-    if args.find_gptfile:
+    elif args.rename:
        find_gptfile(args.find_gptfile)
    if args.find_gpttoc:
        find_gpt_in_toc(args.find_gpttoc)
    if args.rename:
        ok, err = rename_gpts()
        if not ok:
            print(err)