system-prompts/.scripts/idxtool.py

446 lines
17 KiB
Python
Executable file

"""
idxtool is a script is used to perform various GPT indexing and searching tasks
- Find a GPT file by its ID or full ChatGPT URL or via a file containing a list of GPT IDs.
- Rename all the GPTs to include their ChatGPT/g/ID in the filename.
- Generate TOC
- etc.
"""
import sys, os, argparse
from typing import Tuple
from urllib.parse import quote
import gptparser
from gptparser import enum_gpts, parse_gpturl, enum_gpt_files, get_prompts_path
TOC_FILENAME = 'TOC.md'
TOC_GPT_MARKER_LINE = '- GPTs'
def get_toc_file() -> str:
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', TOC_FILENAME))
def rename_gpts():
effective_rename = nb_ok = nb_total = 0
for ok, gpt in enum_gpts():
nb_total += 1
if not ok or not (id := gpt.id()):
print(f"[!] {gpt.filename}")
continue
# Skip files with correct prefix
basename = os.path.basename(gpt.filename)
if basename.startswith(f"{id.id}_"):
nb_ok += 1
continue
effective_rename += 1
# New full file name with ID prefix
new_fn = os.path.join(os.path.dirname(gpt.filename), f"{id.id}_{basename}")
print(f"[+] {basename} -> {os.path.basename(new_fn)}")
if os.system(f"git mv \"{gpt.filename}\" \"{new_fn}\"") == 0:
nb_ok += 1
continue
# If git mv failed, then try os.rename
try:
os.rename(gpt.filename, new_fn)
nb_ok += 1
continue
except OSError as e:
print(f"Rename error: {e.strerror}")
msg = f"Renamed {nb_ok} out of {nb_total} GPT files."
ok = nb_ok == nb_total
if effective_rename == 0:
msg = f"All {nb_total} GPT files were already renamed. No action taken."
print(msg)
return (ok, msg)
def parse_gpt_file(filename) -> Tuple[bool, str]:
ok, gpt = gptparser.GptMarkdownFile.parse(filename)
if ok:
file_name_without_ext = os.path.splitext(os.path.basename(filename))[0]
dst_fn = os.path.join(
os.path.dirname(filename),
f"{file_name_without_ext}.new.md")
gpt.save(dst_fn)
else:
print(gpt)
return (ok, gpt)
def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:
"""
Rebuilds the table of contents (TOC.md) file, generating only the Prompt Collections section
that links to the TOC.md files in the prompts subdirectories.
The TOC file is completely regenerated, not preserving any existing content.
"""
if not toc_out:
print(f"Rebuilding Table of Contents (TOC.md) in place")
else:
print(f"Rebuilding Table of Contents (TOC.md) to '{toc_out}'")
toc_in = get_toc_file()
if not toc_out:
toc_out = toc_in
# Open the output file for writing (overwriting any existing content)
try:
ofile = open(toc_out, 'w', encoding='utf-8')
except:
return (False, f"Failed to open '{toc_out}' for writing.")
# Write a header for the TOC file
out = []
out.append("# ChatGPT System Prompts - Table of Contents\n\n")
out.append("This document contains a table of contents for the ChatGPT System Prompts repository.\n\n")
# Add links to TOC.md files in prompts directory subdirectories
prompts_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts'))
if os.path.exists(prompts_base_path):
prompt_dirs = []
for dirname in os.listdir(prompts_base_path):
dir_path = os.path.join(prompts_base_path, dirname)
toc_path = os.path.join(dir_path, TOC_FILENAME)
# Only include directories that have a TOC.md file
if os.path.isdir(dir_path) and os.path.exists(toc_path):
prompt_dirs.append(dirname)
if prompt_dirs:
out.append("## Prompt Collections\n\n")
prompt_dirs.sort() # Sort alphabetically
for dirname in prompt_dirs:
# Create a relative link to the subdirectory TOC file
link = f"./prompts/{dirname}/{TOC_FILENAME}"
out.append(f"- [{dirname} Collection]({link})\n")
ofile.writelines(out)
ofile.close()
msg = f"Generated TOC with Prompt Collections only."
return (True, msg)
def make_template(url, verbose=True):
"""Creates an empty GPT template file from a ChatGPT URL"""
if not (gpt_info := parse_gpturl(url)):
msg = f"Invalid ChatGPT URL: '{url}'"
if verbose:
print(msg)
return (False, msg)
filename = os.path.join(get_prompts_path(), f"{gpt_info.id}_RENAMEME.md")
if os.path.exists(filename):
msg = f"File '{filename}' already exists."
if verbose:
print(msg)
return (False, msg)
with open(filename, 'w', encoding='utf-8') as file:
for field, info in gptparser.SUPPORTED_FIELDS.items():
if field == 'verif_status':
continue
if field == 'url':
file.write(f"{gptparser.FIELD_PREFIX} {info.display}: {url}\n\n")
elif field == 'instructions':
file.write(f"{gptparser.FIELD_PREFIX} {info.display}:\n```markdown\n{info.display} here...\n```\n\n")
elif field == 'logo':
file.write(f"{gptparser.FIELD_PREFIX} {info.display}: <img ...>\n\n")
else:
file.write(f"{gptparser.FIELD_PREFIX} {info.display}: {info.display} goes here...\n\n")
msg = f"Created template '{filename}' for URL '{url}'"
if verbose:
print(msg)
return (True, msg)
def find_gptfile(keyword, verbose=True):
"""Find a GPT file by its ID or full ChatGPT URL
The ID can be prefixed with '@' to indicate a file containing a list of GPT IDs.
"""
keyword = keyword.strip()
# Response file with a set of GPT IDs
if keyword.startswith('@'):
with open(keyword[1:], 'r', encoding='utf-8') as file:
ids = set()
for line in file:
line = line.strip()
# Skip comments
if line.startswith('#'):
continue
# If the line is a GPT URL, then extract the ID
if gpt_info := parse_gpturl(line):
ids.add(gpt_info.id)
continue
# If not a GPT URL, then it's a GPT ID
ids.add(line)
elif gpt_info := parse_gpturl(keyword):
# A single GPT URL
ids = {gpt_info.id}
else:
# A single GPT ID
ids = {keyword}
if verbose:
print(f'Looking for GPT files with IDs: {", ".join(ids)}')
matches = []
for id, filename in enum_gpt_files():
if id in ids:
if verbose:
print(filename)
matches.append((id, filename))
return matches
def generate_toc_for_prompts_dirs() -> Tuple[bool, str]:
"""
Generates a single TOC.md file for each of the three main directories under prompts:
gpts, official-product, and opensource-prj.
For gpts directory, uses the original GPT-specific TOC generation logic.
For other directories, includes all markdown files in the directory and its subdirectories.
"""
prompts_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts'))
if not os.path.exists(prompts_base_path):
return (False, f"Prompts directory '{prompts_base_path}' does not exist.")
print(f"Generating TOC.md files for main directories under '{prompts_base_path}'")
success = True
messages = []
# Main directories we want to process
main_dirs = ["gpts", "official-product", "opensource-prj"]
def collect_files_recursively(dir_path, base_path=None):
"""
Recursively collect all markdown files from a directory and its subdirectories.
Args:
dir_path: The current directory being processed
base_path: The base directory path used for computing relative paths
Returns:
A list of tuples (relative_path, filename, title) where:
- relative_path is the path relative to the base directory
- filename is the name of the file
- title is the extracted title from the file
"""
if base_path is None:
base_path = dir_path
result = []
try:
items = os.listdir(dir_path)
except (FileNotFoundError, PermissionError) as e:
print(f"Warning: Could not access directory '{dir_path}': {str(e)}")
return result
for item in items:
item_path = os.path.join(dir_path, item)
# Skip TOC.md
if item == TOC_FILENAME:
continue
try:
if os.path.isfile(item_path) and item.endswith('.md'):
# Check if file exists and is readable
if not os.path.exists(item_path):
print(f"Warning: The file {item_path} does not exist")
continue
# Get relative path from the base directory to the file's directory
rel_dir_path = os.path.relpath(os.path.dirname(item_path), base_path)
if rel_dir_path == '.':
rel_dir_path = ''
# Extract title from the file
title = os.path.splitext(item)[0]
try:
with open(item_path, 'r', encoding='utf-8') as f:
first_line = f.readline().strip()
if first_line.startswith('# '):
title = first_line[2:].strip()
except Exception as e:
print(f"Warning: Could not read file '{item_path}': {str(e)}")
result.append((rel_dir_path, item, title))
elif os.path.isdir(item_path):
# Recursively collect files from subdirectories using the same base_path
result.extend(collect_files_recursively(item_path, base_path))
except Exception as e:
print(f"Warning: Error processing '{item_path}': {str(e)}")
return result
def generate_gpts_toc(dir_path):
"""Generate TOC.md for gpts directory using the original GPT-specific logic.
The file is completely regenerated, not preserving any existing content."""
toc_path = os.path.join(dir_path, TOC_FILENAME)
try:
with open(toc_path, 'w', encoding='utf-8') as toc_file:
toc_file.write(f"# gpts \n\n")
# Count GPTs
enumerated_gpts = list(enum_gpts())
nb_ok = sum(1 for ok, gpt in enumerated_gpts if ok and gpt.id())
toc_file.write(f"## GPTs ({nb_ok} total)\n\n")
nb_ok = nb_total = 0
gpts = []
for ok, gpt in enumerated_gpts:
nb_total += 1
if ok:
if gpt_id := gpt.id():
nb_ok += 1
gpts.append((gpt_id, gpt))
else:
print(f"[!] No ID detected: {gpt.filename}")
else:
print(f"[!] {gpt}")
# Consistently sort the GPTs by title
def gpts_sorter(key):
gpt_id, gpt = key
version = f"{gpt.get('version')}" if gpt.get('version') else ''
return f"{gpt.get('title')}{version} (id: {gpt_id.id}))"
gpts.sort(key=gpts_sorter)
for id, gpt in gpts:
file_link = f"./{quote(os.path.basename(gpt.filename))}"
version = f" {gpt.get('version')}" if gpt.get('version') else ''
toc_file.write(f"- [{gpt.get('title')}{version} (id: {id.id})]({file_link})\n")
return (True, f"Generated TOC.md for 'gpts' with {nb_ok} out of {nb_total} GPTs.")
except Exception as e:
return (False, f"Error generating TOC.md for 'gpts': {str(e)}")
# Process each top-level directory under prompts/
for dirname in main_dirs:
dir_path = os.path.join(prompts_base_path, dirname)
if not os.path.isdir(dir_path):
messages.append(f"Directory '{dirname}' does not exist, skipping")
continue
# For gpts directory, use the original GPT-specific logic
if dirname == "gpts":
ok, msg = generate_gpts_toc(dir_path)
success = success and ok
messages.append(msg)
continue
# For other directories, use the new recursive logic
# Collect all markdown files in this directory and its subdirectories
md_files = collect_files_recursively(dir_path)
if not md_files:
messages.append(f"No markdown files found in '{dirname}' or its subdirectories, skipping TOC generation")
continue
# Generate TOC.md for this directory
toc_path = os.path.join(dir_path, TOC_FILENAME)
try:
with open(toc_path, 'w', encoding='utf-8') as toc_file:
toc_file.write(f"# {dirname} \n\n")
# Group files by their subdirectory
files_by_dir = {}
for rel_dir_path, filename, title in md_files:
if rel_dir_path not in files_by_dir:
files_by_dir[rel_dir_path] = []
files_by_dir[rel_dir_path].append((filename, title))
# First list files in the root directory
if '' in files_by_dir:
root_files = files_by_dir['']
root_files.sort() # Sort alphabetically
for filename, title in root_files:
toc_file.write(f"- [{title}](./{quote(filename)})\n")
# Add a separator if we have subdirectories
if len(files_by_dir) > 1:
toc_file.write("\n")
# Then list files in subdirectories
subdirs = [d for d in files_by_dir.keys() if d != '']
if subdirs:
toc_file.write("## Subdirectories\n\n")
# Sort subdirectories alphabetically
subdirs.sort()
for subdir in subdirs:
# Write the subdirectory name as a heading
display_subdir = subdir.replace('\\', '/') # Ensure consistent path display
toc_file.write(f"### {display_subdir}\n\n")
# Sort files in this subdirectory alphabetically
subdir_files = files_by_dir[subdir]
subdir_files.sort()
for filename, title in subdir_files:
# Create a link with the correct relative path to the file
# Use os.path.join for correct path construction then replace backslashes for display
link_path = os.path.join(subdir, filename).replace('\\', '/')
toc_file.write(f"- [{title}](./{quote(link_path)})\n")
toc_file.write("\n")
messages.append(f"Generated TOC.md for '{dirname}' with {len(md_files)} total files")
except Exception as e:
success = False
messages.append(f"Error generating TOC.md for '{dirname}': {str(e)}")
result_message = "\n".join(messages)
return (success, result_message)
def main():
parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo')
parser.add_argument('--toc', nargs='?', const='', type=str, help='Rebuild the table of contents (TOC.md) file')
parser.add_argument('--find-gpt', type=str, help='Find a GPT file by its ID or full ChatGPT URL')
parser.add_argument('--template', type=str, help='Creates an empty GPT template file from a ChatGPT URL')
parser.add_argument('--parse-gptfile', type=str, help='Parses a GPT file name')
parser.add_argument('--rename', action='store_true', help='Rename the GPT file names to include their GPT ID')
# Handle arguments
ok = True
args = parser.parse_args()
if args.parse_gptfile:
ok, err = parse_gpt_file(args.parse_gptfile)
if not ok:
print(err)
elif args.toc is not None:
if args.toc:
ok, err = rebuild_toc(args.toc)
else:
# First rebuild the main TOC file
ok, msg = rebuild_toc('')
print(msg)
# Then generate TOC files for subdirectories under prompts/
sub_ok, sub_err = generate_toc_for_prompts_dirs()
ok = ok and sub_ok
err = sub_err if not sub_ok else ""
if not ok:
print(err)
elif args.find_gpt:
find_gptfile(args.find_gpt)
elif args.template:
make_template(args.template)
elif args.rename:
ok, err = rename_gpts()
if not ok:
print(err)
sys.exit(0 if ok else 1)
if __name__ == "__main__":
main()