mirror of
https://github.com/LouisShark/chatgpt_system_prompt.git
synced 2025-07-04 22:00:27 -04:00
- added alphabetical IDs sort in TOC generation - implemented multi-find 'findgpt' command line for the idxtool (it supports URLs, IDs, or response files)
165 lines
6.3 KiB
Python
165 lines
6.3 KiB
Python
"""
|
|
GPT parsing module.
|
|
|
|
The GPT markdown files have to adhere to a very specific format described in the README.md file in the root of the CSP project.
|
|
"""
|
|
|
|
import os, re
|
|
from collections import namedtuple
|
|
from typing import Union, Tuple, Generator, Iterator
|
|
|
|
|
|
GPT_BASE_URL = 'https://chat.openai.com/g/g-'
|
|
GPT_BASE_URL_L = len(GPT_BASE_URL)
|
|
FIELD_PREFIX = 'GPT'
|
|
|
|
GPT_FILE_ID_RE = re.compile(r'^([0-9a-z]{9})_(.*)\.md$', re.IGNORECASE)
|
|
"""GPT file name regex with ID and name capture."""
|
|
GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)
|
|
"""GPT file name regex with version capture."""
|
|
|
|
GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])
|
|
|
|
GptIdentifier = namedtuple('GptIdentifier', ['id', 'name'])
|
|
"""Description of the fields supported by GPT markdown files."""
|
|
|
|
SUPPORTED_FIELDS = {
|
|
'url': GptFieldInfo(10, 'URL'),
|
|
'title': GptFieldInfo(20, 'Title'),
|
|
'description': GptFieldInfo(30, 'Description'),
|
|
'logo': GptFieldInfo(40, 'Logo'),
|
|
'verif_status': GptFieldInfo(50, 'Verification Status'),
|
|
'instructions': GptFieldInfo(60, 'Instructions'),
|
|
'actions': GptFieldInfo(70, 'Actions'),
|
|
'kb_files_list': GptFieldInfo(80, 'KB Files List'),
|
|
'extras': GptFieldInfo(90, 'Extras')
|
|
}
|
|
"""
|
|
Dictionary of the fields supported by GPT markdown files:
|
|
- The key should always be in lower case
|
|
- The GPT markdown file will have the form: {FIELD_PREFIX} {key}: {value}
|
|
"""
|
|
|
|
class GptMarkdownFile:
|
|
"""
|
|
A class to represent a GPT markdown file.
|
|
"""
|
|
def __init__(self, fields={}, filename: str = '') -> None:
|
|
self.fields = fields
|
|
self.filename = filename
|
|
|
|
def get(self, key: str, strip: bool = True) -> Union[str, None]:
|
|
"""
|
|
Return the value of the field with the specified key.
|
|
:param key: str, key of the field.
|
|
:return: str, value of the field.
|
|
"""
|
|
key = key.lower()
|
|
if key == 'version':
|
|
m = GPT_FILE_VERSION_RE.search(self.filename)
|
|
return m.group(1) if m else ''
|
|
|
|
v = self.fields.get(key)
|
|
return v.strip() if strip else v
|
|
|
|
def id(self) -> Union[GptIdentifier, None]:
|
|
"""
|
|
Return the GPT identifier.
|
|
:return: GptIdentifier object.
|
|
"""
|
|
return parse_gpturl(self.fields.get('url'))
|
|
|
|
def __str__(self) -> str:
|
|
sorted_fields = sorted(self.fields.items(), key=lambda x: SUPPORTED_FIELDS[x[0]].order)
|
|
# Check if the field value contains the start marker of the markdown block and add a blank line before it
|
|
field_strings = []
|
|
for key, value in sorted_fields:
|
|
if value:
|
|
# Only replace the first occurrence of ```markdown
|
|
modified_value = value.replace("```markdown", "\r\n```markdown", 1)
|
|
field_string = f"{FIELD_PREFIX} {SUPPORTED_FIELDS[key].display}: {modified_value}"
|
|
field_strings.append(field_string)
|
|
return "\r\n".join(field_strings)
|
|
|
|
@staticmethod
|
|
def parse(file_path: str) -> Union['GptMarkdownFile', Tuple[bool, str]]:
|
|
"""
|
|
Parse a markdown file and return a GptMarkdownFile object.
|
|
:param file_path: str, path to the markdown file.
|
|
:return: GptMarkdownFile if successful, otherwise a tuple with False and an error message.
|
|
"""
|
|
if not os.path.exists(file_path):
|
|
return (False, f"File '{file_path}' does not exist.")
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
fields = {key.lower(): [] for key in SUPPORTED_FIELDS.keys()}
|
|
field_re = re.compile(f"^\s*{FIELD_PREFIX}\s+({'|'.join(fields.keys())}):", re.IGNORECASE)
|
|
current_field = None
|
|
for line in file:
|
|
if m := field_re.match(line):
|
|
current_field = m.group(1).lower()
|
|
line = line[len(m.group(0)):].strip()
|
|
|
|
if current_field:
|
|
if current_field not in SUPPORTED_FIELDS:
|
|
return (False, f"Field '{current_field}' is not supported.")
|
|
|
|
fields[current_field].append(line)
|
|
|
|
gpt = GptMarkdownFile(
|
|
{key: ''.join(value) for key, value in fields.items()},
|
|
filename=file_path)
|
|
return (True, gpt)
|
|
|
|
def save(self, file_path: str) -> Tuple[bool, Union[str, None]]:
|
|
"""
|
|
Save the GptMarkdownFile object to a markdown file.
|
|
:param file_path: str, path to the markdown file.
|
|
"""
|
|
try:
|
|
with open(file_path, 'w', encoding='utf-8') as file:
|
|
file.write(str(self))
|
|
return (True, None)
|
|
except Exception as e:
|
|
return (False, f"Failed to save file '{file_path}': {e}")
|
|
|
|
def parse_gpturl(url: str) -> Union[GptIdentifier, None]:
|
|
if url and url.startswith(GPT_BASE_URL):
|
|
id = url[GPT_BASE_URL_L:].split('\n')[0]
|
|
i = id.find('-')
|
|
if i != -1:
|
|
return GptIdentifier(id[:i], id[i+1:])
|
|
else:
|
|
return GptIdentifier(id, '')
|
|
|
|
|
|
def get_prompts_path() -> str:
|
|
"""Return the path to the prompts directory."""
|
|
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts'))
|
|
|
|
def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]:
|
|
"""Enumerate all the GPT files in the prompts directory, parse them and return the parsed GPT object."""
|
|
prompts_path = get_prompts_path()
|
|
for file_path in os.listdir(prompts_path):
|
|
_, ext = os.path.splitext(file_path)
|
|
if ext != '.md':
|
|
continue
|
|
file_path = os.path.join(prompts_path, file_path)
|
|
ok, gpt = GptMarkdownFile.parse(file_path)
|
|
if ok:
|
|
yield (True, gpt)
|
|
else:
|
|
yield (False, f"Failed to parse '{file_path}': {gpt}")
|
|
|
|
def enum_gpt_files() -> Iterator[Tuple[str, str]]:
|
|
"""
|
|
Enumerate all the GPT files in the prompts directory while relying on the files naming convention.
|
|
To normalize all the GPT file names, run the `idxtool.py --rename`
|
|
"""
|
|
prompts_path = get_prompts_path()
|
|
for file_path in os.listdir(prompts_path):
|
|
m = GPT_FILE_ID_RE.match(file_path)
|
|
if not m:
|
|
continue
|
|
file_path = os.path.join(prompts_path, file_path)
|
|
yield (m.group(1), file_path)
|