""" GPT parsing module. The GPT markdown files have to adhere to a very specific format described in the README.md file in the root of the CSP project. """ import os, re from collections import namedtuple from typing import Union, Tuple, Generator compiled_pattern = re.compile(r'^([0-9a-z]{9})_([^\.]+)\.md$', re.IGNORECASE) GPT_BASE_URL = 'https://chat.openai.com/g/g-' GPT_BASE_URL_L = len(GPT_BASE_URL) FIELD_PREFIX = 'GPT' GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE) GptFieldInfo = namedtuple('FieldInfo', ['order', 'display']) GptIdentifier = namedtuple('GptIdentifier', ['id', 'name']) """Description of the fields supported by GPT markdown files.""" SUPPORTED_FIELDS = { 'url': GptFieldInfo(10, 'URL'), 'title': GptFieldInfo(20, 'Title'), 'description': GptFieldInfo(30, 'Description'), 'logo': GptFieldInfo(40, 'Logo'), 'verif_status': GptFieldInfo(50, 'Verification Status'), 'instructions': GptFieldInfo(60, 'Instructions'), 'actions': GptFieldInfo(70, 'Actions'), 'kb_files_list': GptFieldInfo(80, 'KB Files List'), 'extras': GptFieldInfo(90, 'Extras') } """ Dictionary of the fields supported by GPT markdown files: - The key should always be in lower case - The GPT markdown file will have the form: {FIELD_PREFIX} {key}: {value} """ class GptMarkdownFile: """ A class to represent a GPT markdown file. """ def __init__(self, fields={}, filename: str = '') -> None: self.fields = fields self.filename = filename def get(self, key: str, strip: bool = True) -> Union[str, None]: """ Return the value of the field with the specified key. :param key: str, key of the field. :return: str, value of the field. """ key = key.lower() if key == 'version': m = GPT_FILE_VERSION_RE.search(self.filename) return m.group(1) if m else '' v = self.fields.get(key) return v.strip() if strip else v def id(self) -> Union[GptIdentifier, None]: """ Return the GPT identifier. :return: GptIdentifier object. """ return parse_gpturl(self.fields.get('url')) def __str__(self) -> str: sorted_fields = sorted(self.fields.items(), key=lambda x: SUPPORTED_FIELDS[x[0]].order) # Check if the field value contains the start marker of the markdown block and add a blank line before it field_strings = [] for key, value in sorted_fields: if value: # Only replace the first occurrence of ```markdown modified_value = value.replace("```markdown", "\r\n```markdown", 1) field_string = f"{FIELD_PREFIX} {SUPPORTED_FIELDS[key].display}: {modified_value}" field_strings.append(field_string) return "\r\n".join(field_strings) @staticmethod def parse(file_path: str) -> Union['GptMarkdownFile', Tuple[bool, str]]: """ Parse a markdown file and return a GptMarkdownFile object. :param file_path: str, path to the markdown file. :return: GptMarkdownFile if successful, otherwise a tuple with False and an error message. """ if not os.path.exists(file_path): return (False, f"File '{file_path}' does not exist.") with open(file_path, 'r', encoding='utf-8') as file: fields = {key.lower(): [] for key in SUPPORTED_FIELDS.keys()} field_re = re.compile(f"^\s*{FIELD_PREFIX}\s+({'|'.join(fields.keys())}):", re.IGNORECASE) current_field = None for line in file: if m := field_re.match(line): current_field = m.group(1).lower() line = line[len(m.group(0)):].strip() if current_field: if current_field not in SUPPORTED_FIELDS: return (False, f"Field '{current_field}' is not supported.") fields[current_field].append(line) gpt = GptMarkdownFile( {key: ''.join(value) for key, value in fields.items()}, filename=file_path) return (True, gpt) def save(self, file_path: str) -> Tuple[bool, Union[str, None]]: """ Save the GptMarkdownFile object to a markdown file. :param file_path: str, path to the markdown file. """ try: with open(file_path, 'w', encoding='utf-8') as file: file.write(str(self)) return (True, None) except Exception as e: return (False, f"Failed to save file '{file_path}': {e}") def parse_gpturl(url: str) -> Union[GptIdentifier, None]: if url and url.startswith(GPT_BASE_URL): id = url[GPT_BASE_URL_L:].split('\n')[0] i = id.find('-') if i != -1: return GptIdentifier(id[:i], id[i+1:]) else: return GptIdentifier(id, '') def get_prompts_path() -> str: """Return the path to the prompts directory.""" return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts')) def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]: """Enumerate all the GPT files in the prompts directory, parse them and return the parsed GPT object.""" prompts_path = get_prompts_path() for file_path in os.listdir(prompts_path): _, ext = os.path.splitext(file_path) if ext != '.md': continue file_path = os.path.join(prompts_path, file_path) ok, gpt = GptMarkdownFile.parse(file_path) if ok: yield (True, gpt) else: yield (False, f"Failed to parse '{file_path}': {gpt}") def enum_gpt_files() -> Generator[str, None, None]: """ Enumerate all the GPT files in the prompts directory while relying on the files naming convention. To normalize all the GPT file names, run the `idxtool.py --rename` """ pattern = r'[a-z]{9}_[a-z]+\.[a-z]+' prompts_path = get_prompts_path() for file_path in os.listdir(prompts_path): _, ext = os.path.splitext(file_path) if ext != '.md': continue file_path = os.path.join(prompts_path, file_path) yield file_path