Skip to content

Commit e332116

Browse files
committed
make virtual env creation much faster
We first create a base venv that is not to be deleted and can be re-used easily. There we install all default packages. Then, we create another, kernel-specific, venv without any additional packages. Instead, we create a link that points to the packages installed in the base image. The base image is intended to stay immutable. All newly installed packages end up in the kernel-specific venv. While they are lost on restart, the packages inside the base image are not.
1 parent 302464a commit e332116

File tree

1 file changed

+53
-29
lines changed

1 file changed

+53
-29
lines changed

gpt_code_ui/kernel_program/kernel_manager.py

Lines changed: 53 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import threading
1111
import time
1212
import traceback
13+
import venv
1314

1415
from time import sleep
1516
from jupyter_client import BlockingKernelClient
@@ -20,8 +21,6 @@
2021
import gpt_code_ui.kernel_program.utils as utils
2122
import gpt_code_ui.kernel_program.config as config
2223

23-
USE_SEPARATE_VENV = False
24-
2524
# Set up globals
2625
messaging = None
2726
logger = config.get_logger()
@@ -170,9 +169,58 @@ def flush_kernel_msgs(kc, tries=1, timeout=0.2):
170169
logger.debug(f"{e} [{type(e)}")
171170

172171

172+
def create_venv(venv_dir: pathlib.Path, install_default_packages: bool) -> pathlib.Path:
173+
venv_bindir = venv_dir / 'bin'
174+
venv_python_executable = venv_bindir / os.path.basename(sys.executable)
175+
176+
if not os.path.isdir(venv_dir):
177+
# create virtual env inside venv_dir directory
178+
venv.create(venv_dir, system_site_packages=True, with_pip=True, upgrade_deps=True)
179+
180+
if install_default_packages:
181+
# install wheel because some packages do not like being installed without
182+
subprocess.run([venv_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0'])
183+
# install all default packages into the venv
184+
default_packages = [
185+
"ipykernel>=6,<7",
186+
"numpy>=1.24,<1.25",
187+
"dateparser>=1.1,<1.2",
188+
"pandas>=1.5,<1.6",
189+
"geopandas>=0.13,<0.14",
190+
"tabulate>=0.9.0<1.0",
191+
"PyPDF2>=3.0,<3.1",
192+
"pdfminer>=20191125,<20191200",
193+
"pdfplumber>=0.9,<0.10",
194+
"matplotlib>=3.7,<3.8",
195+
"openpyxl>=3.1.2,<4",
196+
]
197+
subprocess.run([venv_python_executable, '-m', 'pip', 'install'] + default_packages)
198+
199+
# get base env library path as we need this to refer to this form a derived venv
200+
site_packages = subprocess.check_output([venv_python_executable, '-c', 'import sysconfig; print(sysconfig.get_paths()["purelib"])'])
201+
site_packages = site_packages.decode('utf-8').split('\n')[0]
202+
203+
return pathlib.Path(site_packages)
204+
205+
206+
def create_derived_venv(base_venv: pathlib.Path, venv_dir: pathlib.Path):
207+
site_packages_base = create_venv(base_venv, install_default_packages=True)
208+
site_packages_derived = create_venv(venv_dir, install_default_packages=False)
209+
210+
# create a link from derived venv into the base venv, see https://stackoverflow.com/a/75545634
211+
with open(site_packages_derived / '_base_packages.pth', 'w') as pth:
212+
pth.write(f'{site_packages_base}\n')
213+
214+
venv_bindir = venv_dir / 'bin'
215+
venv_python_executable = venv_bindir / os.path.basename(sys.executable)
216+
217+
return venv_bindir, venv_python_executable
218+
219+
173220
def start_kernel(id: str):
174221
cwd = pathlib.Path(os.getcwd())
175222
kernel_dir = cwd / f'kernel.{id}'
223+
base_dir = cwd / 'kernel.base'
176224

177225
# Cleanup potential leftovers
178226
shutil.rmtree(kernel_dir, ignore_errors=True)
@@ -182,33 +230,9 @@ def start_kernel(id: str):
182230
kernel_connection_file = kernel_dir / "kernel_connection_file.json"
183231
launch_kernel_script_path = pathlib.Path(__file__).parent.resolve() / "launch_kernel.py"
184232

185-
if not USE_SEPARATE_VENV:
186-
kernel_python_executable = sys.executable # TODO: here we could also pick up an already existing env by simply selecting the corresponding Python binary
187-
else:
188-
kernel_venv = kernel_dir / 'venv'
189-
kernel_venv_bindir = kernel_venv / 'bin'
190-
kernel_python_executable = kernel_venv_bindir / os.path.basename(sys.executable)
191-
kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH']
192-
193-
# create virtual env inside kernel_venv directory
194-
subprocess.run([sys.executable, '-m', 'venv', kernel_venv, '--upgrade-deps', '--system-site-packages'])
195-
# install wheel because some packages do not like being installed without
196-
subprocess.run([kernel_python_executable, '-m', 'pip', 'install', 'wheel>=0.41,<1.0'])
197-
# install all default packages into the venv
198-
default_packages = [
199-
"ipykernel>=6,<7",
200-
"numpy>=1.24,<1.25",
201-
"dateparser>=1.1,<1.2",
202-
"pandas>=1.5,<1.6",
203-
"geopandas>=0.13,<0.14",
204-
"tabulate>=0.9.0<1.0",
205-
"PyPDF2>=3.0,<3.1",
206-
"pdfminer>=20191125,<20191200",
207-
"pdfplumber>=0.9,<0.10",
208-
"matplotlib>=3.7,<3.8",
209-
"openpyxl>=3.1.2,<4",
210-
]
211-
subprocess.run([kernel_python_executable, '-m', 'pip', 'install'] + default_packages)
233+
kernel_venv_dir = kernel_dir / 'venv'
234+
kernel_venv_bindir, kernel_python_executable = create_derived_venv(base_dir, kernel_venv_dir)
235+
kernel_env['PATH'] = str(kernel_venv_bindir) + os.pathsep + kernel_env['PATH']
212236

213237
# start the kernel using the virtual env python executable
214238
kernel_process = subprocess.Popen(

0 commit comments

Comments
 (0)