Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

idxtool: find_gpt_file (work in progress) #63

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions .scripts/gptparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
from collections import namedtuple
from typing import Union, Tuple, Generator

compiled_pattern = re.compile(r'^([0-9a-z]{9})_([^\.]+)\.md$', re.IGNORECASE)

GPT_BASE_URL = 'https://chat.openai.com/g/g-'
GPT_BASE_URL_L = len(GPT_BASE_URL)
FIELD_PREFIX = 'GPT'

GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)

GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])

GptIdentifier = namedtuple('GptIdentifier', ['id', 'name'])
"""Description of the fields supported by GPT markdown files."""

# Description of the fields supported by GPT markdown files.
SUPPORTED_FIELDS = {
'url': GptFieldInfo(10, 'URL'),
'title': GptFieldInfo(20, 'Title'),
Expand Down Expand Up @@ -132,7 +135,7 @@ def get_prompts_path() -> str:
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts'))

def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]:
"""Enumerate all the GPT files in the prompts directory."""
"""Enumerate all the GPT files in the prompts directory, parse them and return the parsed GPT object."""
prompts_path = get_prompts_path()
for file_path in os.listdir(prompts_path):
_, ext = os.path.splitext(file_path)
Expand All @@ -144,3 +147,20 @@ def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, Non
yield (True, gpt)
else:
yield (False, f"Failed to parse '{file_path}': {gpt}")

def enum_gpt_files() -> Generator[str, None, None]:
"""
Enumerate all the GPT files in the prompts directory while relying on the files naming convention.
To normalize all the GPT file names, run the `idxtool.py --rename`
"""
pattern = r'[a-z]{9}_[a-z]+\.[a-z]+'

prompts_path = get_prompts_path()
for file_path in os.listdir(prompts_path):
_, ext = os.path.splitext(file_path)
if ext != '.md':
continue
file_path = os.path.join(prompts_path, file_path)
yield file_path


68 changes: 14 additions & 54 deletions .scripts/idxtool.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""

import sys, os, argparse
from gptparser import GptMarkdownFile, enum_gpts
from gptparser import GptMarkdownFile, enum_gpts, parse_gpturl
from typing import Tuple
from urllib.parse import quote

Expand All @@ -32,7 +32,7 @@ def update_description(filename):
print(f"TODO Updating description with file: {filename}")
raise NotImplementedError

def rename_gpt():
def rename_gpts():
nb_ok = nb_total = 0
all_renamed_already = True

Expand Down Expand Up @@ -63,51 +63,6 @@ def rename_gpt():
return (ok, msg)


def reformat_gpt_files(src_path: str, dst_path: str) -> Tuple[bool, str]:
"""
Reformat all the GPT files in the source path and save them to the destination path.
:param src_path: str, path to the source directory.
:param dst_path: str, path to the destination directory.
"""
if not os.path.exists(src_path):
return (False, f"Source path '{src_path}' does not exist.")

if not os.path.exists(dst_path):
os.makedirs(dst_path)

print(f"Reformatting GPT files in '{src_path}' and saving them to '{dst_path}'...")

nb_ok = nb_total = 0
for src_file_path in os.listdir(src_path):
_, ext = os.path.splitext(src_file_path)
if ext != '.md':
continue
nb_total += 1
dst_file_path = os.path.join(dst_path, src_file_path)
src_file_path = os.path.join(src_path, src_file_path)
ok, gpt = GptMarkdownFile.parse(src_file_path)
if ok:
ok, msg = gpt.save(dst_file_path)
if ok:
id = gpt.id()
if id:
info = f"; id={id.id}"
if id.name:
info += f", name='{id.name}'"
else:
info = ''
print(f"[+] saved '{os.path.basename(src_file_path)}'{info}")
nb_ok += 1
else:
print(f"[!] failed to save '{src_file_path}': {msg}")
else:
print(f"[!] failed to parse '{src_file_path}': {gpt}")

msg = f"Reformatted {nb_ok} out of {nb_total} GPT files."
ok = nb_ok == nb_total
return (ok, msg)


def parse_gpt_file(filename) -> Tuple[bool, str]:
ok, gpt = GptMarkdownFile.parse(filename)
if ok:
Expand Down Expand Up @@ -181,16 +136,21 @@ def rebuild_toc(toc_out: str = '') -> Tuple[bool, str]:
print(msg)
return (ok, msg)


def find_gptfile(keyword):
print(f"TODO: Finding GPT file with ID or name: {keyword}")
raise NotImplementedError


def find_gpt_in_toc(gptid_or_string):
print(f"TODO: Searching TOC.md for GPT ID or string: {gptid_or_string}")
raise NotImplementedError

def find_gptfile(keyword):
keyword = keyword.strip().tolower()
# Response file with a set of GPT IDs
if keyword.startswith('@'):
print(f"TODO: Finding GPT file with ID: {keyword}")
if gpt_info := parse_gpturl(keyword):
keyword = gpt_info.id

print(f"TODO: Finding GPT with ID: {keyword}")
raise NotImplementedError

def main():
parser = argparse.ArgumentParser(description='idxtool: A GPT indexing and searching tool for the CSP repo')

Expand Down Expand Up @@ -223,7 +183,7 @@ def main():
if args.find_gpttoc:
find_gpt_in_toc(args.find_gpttoc)
if args.rename:
ok, err = rename_gpt()
ok, err = rename_gpts()
if not ok:
print(err)

Expand Down
54 changes: 54 additions & 0 deletions .scripts/oneoff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
'oneoff.py' is a script that performs one-off operations on the GPT files

- Reformat all the GPT files in the source path and save them to the destination path.

"""

from gptparser import GptMarkdownFile
from typing import Tuple
import os

def reformat_gpt_files(src_path: str, dst_path: str) -> Tuple[bool, str]:
"""
Reformat all the GPT files in the source path and save them to the destination path.
:param src_path: str, path to the source directory.
:param dst_path: str, path to the destination directory.
"""
if not os.path.exists(src_path):
return (False, f"Source path '{src_path}' does not exist.")

if not os.path.exists(dst_path):
os.makedirs(dst_path)

print(f"Reformatting GPT files in '{src_path}' and saving them to '{dst_path}'...")

nb_ok = nb_total = 0
for src_file_path in os.listdir(src_path):
_, ext = os.path.splitext(src_file_path)
if ext != '.md':
continue
nb_total += 1
dst_file_path = os.path.join(dst_path, src_file_path)
src_file_path = os.path.join(src_path, src_file_path)
ok, gpt = GptMarkdownFile.parse(src_file_path)
if ok:
ok, msg = gpt.save(dst_file_path)
if ok:
id = gpt.id()
if id:
info = f"; id={id.id}"
if id.name:
info += f", name='{id.name}'"
else:
info = ''
print(f"[+] saved '{os.path.basename(src_file_path)}'{info}")
nb_ok += 1
else:
print(f"[!] failed to save '{src_file_path}': {msg}")
else:
print(f"[!] failed to parse '{src_file_path}': {gpt}")

msg = f"Reformatted {nb_ok} out of {nb_total} GPT files."
ok = nb_ok == nb_total
return (ok, msg)
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/.scripts/idxtool.py",
"args": ["--find-gptfile", "GPT3"],
"args": ["--find-gptfile", "https://chat.openai.com/g/g-svehnI9xP-retro-adventures"],
"console": "integratedTerminal"
},
{
Expand Down
Loading