Skip to content

feat: Create Individual Virtual Env for the Kernel #73

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
773cf4f
PEP8 code styling, no functional change
dasmy Jul 21, 2023
67460a2
Store and use the full conversation history in proper chat mode. This…
dasmy Jul 24, 2023
20e1afb
add new message types from kernel manager:
dasmy Jul 26, 2023
3bbd8f1
store execution results or error messages in conversation history to …
dasmy Jul 26, 2023
35bee7a
output error messages with Syntax highlighting
dasmy Jul 26, 2023
e01f465
create individual workdir for kernel and initialize a dedicated virtu…
dasmy Jul 27, 2023
7a82c76
fix up-/download by using a fixed workspace name. this is fine as lon…
dasmy Jul 27, 2023
7336b80
some PEP8 and code deduplication
dasmy Jul 27, 2023
b9b1b71
Merge pull request #13 from dasmy/dev/more_icons
dasmy Jul 28, 2023
05d85f6
Merge pull request #14 from dasmy/dev/do_not_execute_without_code
dasmy Jul 28, 2023
54fc5cf
Merge branch 'main' into dev/kernel_env
dasmy Jul 28, 2023
5f75280
Merge pull request #15 from dasmy/dev/kernel_env
dasmy Jul 28, 2023
5b6b4d5
Merge branch 'main' into dev/conversation_history
dasmy Jul 28, 2023
bbe1069
Merge pull request #16 from dasmy/dev/conversation_history
dasmy Jul 28, 2023
6c5a4c6
create individual workdir for kernel and initialize a dedicated virtu…
dasmy Jul 27, 2023
248c3cc
fix up-/download by using a fixed workspace name. this is fine as lon…
dasmy Jul 27, 2023
702365b
some PEP8 and code deduplication
dasmy Jul 27, 2023
302464a
make creation of new env optional
dasmy Jul 31, 2023
e332116
make virtual env creation much faster
dasmy Jul 31, 2023
61f67bb
Merge branch 'dev/kernel_env'
dasmy Jul 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

# Program related
process_pids/
kernel.*
kernel_connection_file.json

# Python stuff
Expand Down
18 changes: 3 additions & 15 deletions frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ function App() {
let [messages, setMessages] = useState<Array<MessageDict>>(
Array.from([
{
text: "Hello! I'm a GPT Code assistant. Ask me to do something for you! Pro tip: you can upload a file and I'll be able to use it.",
text: "Hello! I am a GPT Code assistant. Ask me to do something for you! Pro tip: you can upload a file and I'll be able to use it.",
role: "generator",
type: "message",
},
Expand All @@ -53,7 +53,7 @@ function App() {
])
);
let [waitingForSystem, setWaitingForSystem] = useState<WaitingStates>(
WaitingStates.Idle
WaitingStates.StartingKernel
);
const chatScrollRef = React.useRef<HTMLDivElement>(null);

Expand All @@ -78,6 +78,7 @@ function App() {
const handleCommand = (command: string) => {
if (command == "reset") {
addMessage({ text: "Restarting the kernel.", type: "message", role: "system" });
setWaitingForSystem(WaitingStates.StartingKernel);

fetch(`${Config.API_ADDRESS}/restart`, {
method: "POST",
Expand Down Expand Up @@ -161,19 +162,6 @@ function App() {
function completeUpload(message: string) {
addMessage({ text: message, type: "message", role: "upload" });
setWaitingForSystem(WaitingStates.Idle);

// Inform prompt server
fetch(`${Config.WEB_ADDRESS}/inject-context`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: message,
}),
})
.then(() => {})
.catch((error) => console.error("Error:", error));
}

function startUpload(_: string) {
Expand Down
23 changes: 21 additions & 2 deletions frontend/src/components/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,33 @@ function Message(props: {
<div className="cell-output" dangerouslySetInnerHTML={{ __html: text }}></div>
))}

{(props.type == "message_raw") &&
{props.type == "message_error" &&
(props.showLoader ? (
<div>
{text} {props.showLoader ? <div className="loader"></div> : null}
</div>
) : (
<div>
Execution Error:
<SyntaxHighlighter
{...props}
children={text}
wrapLongLines={true}
language={"python"}
PreTag="div"
/>
</div>
))}

{props.type == "message_raw" &&
(props.showLoader ? (
<div>
{text} {props.showLoader ? <div className="loader"></div> : null}
</div>
) : (
<div className="cell-output" dangerouslySetInnerHTML={{ __html: text }}></div>
))}

{props.type == "image/png" &&
<div className="cell-output-image" dangerouslySetInnerHTML={{ __html: `<img src='data:image/png;base64,${text}' />` }}></div>
}
Expand All @@ -94,6 +112,7 @@ function Message(props: {


export enum WaitingStates {
StartingKernel = "Starting Kernel",
GeneratingCode = "Generating code",
RunningCode = "Running code",
UploadingFile = "Uploading file",
Expand Down
2 changes: 1 addition & 1 deletion gpt_code_ui/kernel_program/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ def get_logger():
logger = logging.getLogger(__name__)
if "DEBUG" in os.environ:
logger.setLevel(logging.DEBUG)
return logger
return logger
124 changes: 92 additions & 32 deletions gpt_code_ui/kernel_program/kernel_manager.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import sys
import subprocess
import os
import shutil
import atexit
import queue
import json
import signal
import pathlib
import threading
import time
import atexit
import traceback
import venv

from time import sleep
from jupyter_client import BlockingKernelClient
Expand Down Expand Up @@ -56,7 +58,7 @@ def cleanup_spawned_processes():
os.kill(pid, signal.CTRL_BREAK_EVENT)
else:
os.kill(pid, signal.SIGKILL)

# After successful kill, cleanup pid file
os.remove(fp)

Expand Down Expand Up @@ -149,7 +151,7 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2):
elif msg["msg_type"] == "error":
send_message(
utils.escape_ansi("\n".join(msg["content"]["traceback"])),
"message_raw",
"message_error",
)
except queue.Empty:
hit_empty += 1
Expand All @@ -167,58 +169,116 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2):
logger.debug(f"{e} [{type(e)}")


def start_kernel():
kernel_connection_file = os.path.join(os.getcwd(), "kernel_connection_file.json")
def create_venv(venv_dir: pathlib.Path, install_default_packages: bool) -> pathlib.Path:
venv_bindir = venv_dir / 'bin'
venv_python_executable = venv_bindir / os.path.basename(sys.executable)

if os.path.isfile(kernel_connection_file):
os.remove(kernel_connection_file)
if os.path.isdir(kernel_connection_file):
os.rmdir(kernel_connection_file)
if not os.path.isdir(venv_dir):
# create virtual env inside venv_dir directory
venv.create(venv_dir, system_site_packages=True, with_pip=True, upgrade_deps=True)

launch_kernel_script_path = os.path.join(
pathlib.Path(__file__).parent.resolve(), "launch_kernel.py"
)
if install_default_packages:
# install wheel because some packages do not like being installed without
subprocess.run([venv_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0'])
# install all default packages into the venv
default_packages = [
"ipykernel>=6,<7",
"numpy>=1.24,<1.25",
"dateparser>=1.1,<1.2",
"pandas>=1.5,<1.6",
"geopandas>=0.13,<0.14",
"tabulate>=0.9.0<1.0",
"PyPDF2>=3.0,<3.1",
"pdfminer>=20191125,<20191200",
"pdfplumber>=0.9,<0.10",
"matplotlib>=3.7,<3.8",
"openpyxl>=3.1.2,<4",
]
subprocess.run([venv_python_executable, '-m', 'pip', 'install'] + default_packages)

# get base env library path as we need this to refer to this form a derived venv
site_packages = subprocess.check_output([venv_python_executable, '-c', 'import sysconfig; print(sysconfig.get_paths()["purelib"])'])
site_packages = site_packages.decode('utf-8').split('\n')[0]

return pathlib.Path(site_packages)


def create_derived_venv(base_venv: pathlib.Path, venv_dir: pathlib.Path):
site_packages_base = create_venv(base_venv, install_default_packages=True)
site_packages_derived = create_venv(venv_dir, install_default_packages=False)

# create a link from derived venv into the base venv, see https://stackoverflow.com/a/75545634
with open(site_packages_derived / '_base_packages.pth', 'w') as pth:
pth.write(f'{site_packages_base}\n')

venv_bindir = venv_dir / 'bin'
venv_python_executable = venv_bindir / os.path.basename(sys.executable)

return venv_bindir, venv_python_executable

os.makedirs('workspace/', exist_ok=True)

def start_kernel(id: str):
cwd = pathlib.Path(os.getcwd())
kernel_dir = cwd / f'kernel.{id}'
base_dir = cwd / 'kernel.base'

# Cleanup potential leftovers
shutil.rmtree(kernel_dir, ignore_errors=True)
os.makedirs(kernel_dir)

kernel_env = os.environ.copy()
kernel_connection_file = kernel_dir / "kernel_connection_file.json"
launch_kernel_script_path = pathlib.Path(__file__).parent.resolve() / "launch_kernel.py"

kernel_venv_dir = kernel_dir / 'venv'
kernel_venv_bindir, kernel_python_executable = create_derived_venv(base_dir, kernel_venv_dir)
kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH']

# start the kernel using the virtual env python executable
kernel_process = subprocess.Popen(
[
sys.executable,
kernel_python_executable,
launch_kernel_script_path,
"--IPKernelApp.connection_file",
kernel_connection_file,
"--matplotlib=inline",
"--quiet",
],
cwd='workspace/'
cwd=kernel_dir,
env=kernel_env,
)
# Write PID for caller to kill
str_kernel_pid = str(kernel_process.pid)
os.makedirs(config.KERNEL_PID_DIR, exist_ok=True)
with open(os.path.join(config.KERNEL_PID_DIR, str_kernel_pid + ".pid"), "w") as p:
p.write("kernel")

utils.store_pid(kernel_process.pid, "kernel")

# Wait for kernel connection file to be written
while True:
if not os.path.isfile(kernel_connection_file):
try:
with open(kernel_connection_file, 'r') as fp:
json.load(fp)
except (FileNotFoundError, json.JSONDecodeError):
# Either file was not yet there or incomplete (then JSON parsing failed)
sleep(0.1)
pass
else:
# Keep looping if JSON parsing fails, file may be partially written
try:
with open(kernel_connection_file, 'r') as fp:
json.load(fp)
break
except json.JSONDecodeError:
pass
break

# Client
kc = BlockingKernelClient(connection_file=kernel_connection_file)
kc = BlockingKernelClient(connection_file=str(kernel_connection_file))
kc.load_connection_file()
kc.start_channels()
kc.wait_for_ready()
return kc
return kc, kernel_dir


if __name__ == "__main__":
kc = start_kernel()
start_snakemq(kc)
try:
kernel_id = sys.argv[1]
except IndexError as e:
logger.exception('Missing kernel ID command line parameter', e)
else:
kc, kernel_dir = start_kernel(id=kernel_id)

# make sure the dir with the virtualenv will be deleted after kernel termination
atexit.register(lambda: shutil.rmtree(kernel_dir, ignore_errors=True))

start_snakemq(kc)
2 changes: 1 addition & 1 deletion gpt_code_ui/kernel_program/launch_kernel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
if __name__ == "__main__":
from ipykernel import kernelapp as app

app.launch_new_instance()
app.launch_new_instance()
38 changes: 19 additions & 19 deletions gpt_code_ui/kernel_program/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import time

import asyncio
import json
import threading

from queue import Queue
Expand Down Expand Up @@ -47,24 +46,26 @@
app = Flask(__name__)
CORS(app)


def start_kernel_manager():
global kernel_manager_process

kernel_manager_script_path = os.path.join(
pathlib.Path(__file__).parent.resolve(), "kernel_manager.py"
)
kernel_manager_process = subprocess.Popen(
[sys.executable, kernel_manager_script_path]
)
kernel_manager_process = subprocess.Popen([
sys.executable,
kernel_manager_script_path,
'workspace', # This will be used as part of the folder name for the workspace and to create the venv inside. Can be anything, but using 'workspace' makes file up-/download very simple
])

utils.store_pid(kernel_manager_process.pid, "kernel_manager")

# Write PID as <pid>.pid to config.KERNEL_PID_DIR
os.makedirs(config.KERNEL_PID_DIR, exist_ok=True)
with open(os.path.join(config.KERNEL_PID_DIR, "%d.pid" % kernel_manager_process.pid), "w") as p:
p.write("kernel_manager")

def cleanup_kernel_program():
kernel_manager.cleanup_spawned_processes()


async def start_snakemq():
global messaging

Expand All @@ -77,11 +78,11 @@ def on_recv(conn, ident, message):
if message["value"] == "ready":
logger.debug("Kernel is ready.")
result_queue.put({
"value":"Kernel is ready.",
"type": "message"
"value": "Kernel is ready.",
"type": "message_status"
})

elif message["type"] in ["message", "message_raw", "image/png", "image/jpeg"]:
elif message["type"] in ["message", "message_raw", "message_error", "image/png", "image/jpeg"]:
# TODO: 1:1 kernel <> channel mapping
logger.debug("%s of type %s" % (message["value"], message["type"]))

Expand All @@ -97,8 +98,9 @@ def send_queued_messages():
while True:
if send_queue.qsize() > 0:
message = send_queue.get()
utils.send_json(messaging,
{"type": "execute", "value": message["command"]},
utils.send_json(
messaging,
{"type": "execute", "value": message["command"]},
config.IDENT_KERNEL_MANAGER
)
time.sleep(0.1)
Expand All @@ -117,7 +119,7 @@ async def async_link_loop():

@app.route("/api", methods=["POST", "GET"])
def handle_request():

if request.method == "GET":
# Handle GET requests by sending everything that's in the receive_queue
results = [result_queue.get() for _ in range(result_queue.qsize())]
Expand All @@ -128,7 +130,8 @@ def handle_request():
send_queue.put(data)

return jsonify({"result": "success"})



@app.route("/restart", methods=["POST"])
def handle_restart():

Expand All @@ -152,9 +155,6 @@ async def main():
def run_flask_app():
app.run(host="0.0.0.0", port=APP_PORT)


if __name__ == "__main__":
asyncio.run(main())




Loading