Skip to content

Commit e6e3c71

Browse files
committed
Updated readme, python packages transferred to requirements.txt
1 parent 1013327 commit e6e3c71

6 files changed

+165
-108
lines changed

Dockerfile_singleuser

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -157,43 +157,25 @@ RUN fix-permissions $CONDA_DIR && fix-permissions /home/$NB_USER
157157

158158
# update pip and install basic utilities
159159
RUN pip3 install --no-cache-dir --upgrade pip
160-
RUN pip3 install --no-cache-dir setuptools wheel virtualenv
161160

162-
# jupyterhub stuff
163-
RUN pip3 install --no-cache-dir ipywidgets jupyter jupyterhub jupyterlab jupyterlab-git importlib_metadata
164-
RUN pip3 install --no-cache-dir jupyterlab_widgets jupyter_contrib_core jupyter_contrib_nbextensions jupyter-server-proxy fastbook
165-
RUN pip3 install --no-cache-dir docker dockerspawner jupyterhub-firstuseauthenticator jupyterhub-systemdspawner jupyterhub-jwtauthenticator jupyterhub-client jupyterhub-kerberosauthenticator
166-
RUN pip3 install --no-cache-dir jupyterhub-nanowireauthenticator jupyterhub-ldapauthenticator jupyterhub-kubespawner jupyterhub-nativeauthenticator
161+
# install the rest of the packages including medcat
162+
RUN pip3 install --no-cache-dir -r ./requirements.txt
167163

168-
# extra packages for DB connections & utilities (charts, data formats, and other useful tools such as neo4j)
169-
RUN pip3 install --no-cache-dir pytesseract ipyparallel py7zr cython isort html2text jsoncsv simplejson detect wheel nltk keras bokeh seaborn matplotlib graphviz plotly tqdm
170-
RUN pip3 install --no-cache-dir pymssql mysql-connector-python cx-Oracle dataclasses numpy matplotlib pandas dill jsonpickle jsonext psycopg2 psycopg2-binary pyodbc openpyxl
171-
RUN pip3 install --no-cache-dir dvc flask GitPython elasticsearch opensearch-py neo4j eland --ignore-installed PyYAML
172-
RUN pip3 install --no-cache-dir opencv-python torchvision eland plotly
173-
174-
# XNAT
175-
RUN pip3 install --no-cache-dir xnat
176-
177-
# medcat & models
178-
RUN pip3 install --no-cache-dir -U spacy click torch thinc
164+
# install requirements for working with cogstack scripts
165+
RUN pip3 install --no-cache-dir -r notebooks/demo_working_with_cogstack/requirements.txt
179166

180167
# Get the spacy model
181168
#ARG SPACY_MODELS="en_core_web_sm en_core_web_md en_core_web_lg"
182169
#RUN for spacy_model in ${SPACY_MODELS}; do python3 -m spacy download $spacy_model; done
183170

184-
# install requirements for working with cogstack scripts
185-
# RUN pip3 install --no-cache-dir -r notebooks/demo_working_with_cogstack/requirements.txt
186-
187-
RUN pip3 install --no-cache-dir medcat==1.14.0
188-
189171
# clean up pip
190172
RUN pip3 cache purge
191173

192174
#######################################################################################################
193175

194176
# install R and other dependencies
195-
COPY ./scripts/r_kernel_install.sh /etc/jupyterhub/
196-
# RUN Rscript /etc/jupyterhub/r_kernel_install.sh
177+
COPY ./scripts/r_kernel_install.sh /srv/jupyterhub/
178+
RUN Rscript /srv/jupyterhub/r_kernel_install.sh
197179

198180
# create jupyterhub shared folder
199181
RUN mkdir -p /home/jovyan/scratch

Dockerfile_singleuser_gpu

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -187,43 +187,25 @@ RUN fix-permissions $CONDA_DIR && fix-permissions /home/$NB_USER
187187

188188
# update pip and install basic utilities
189189
RUN pip3 install --no-cache-dir --upgrade pip
190-
RUN pip3 install --no-cache-dir setuptools wheel virtualenv
191190

192-
# jupyterhub stuff
193-
RUN pip3 install --no-cache-dir ipywidgets jupyter jupyterhub jupyterlab jupyterlab-git importlib_metadata
194-
RUN pip3 install --no-cache-dir jupyterlab_widgets jupyter_contrib_core jupyter_contrib_nbextensions jupyter-server-proxy fastbook
195-
RUN pip3 install --no-cache-dir docker dockerspawner jupyterhub-firstuseauthenticator jupyterhub-systemdspawner jupyterhub-jwtauthenticator jupyterhub-client jupyterhub-kerberosauthenticator
196-
RUN pip3 install --no-cache-dir jupyterhub-nanowireauthenticator jupyterhub-ldapauthenticator jupyterhub-kubespawner jupyterhub-nativeauthenticator
191+
# install the rest of the packages including medcat
192+
RUN pip3 install --no-cache-dir -r ./requirements.txt
197193

198-
# extra packages for DB connections & utilities (charts, data formats, and other useful tools such as neo4j)
199-
RUN pip3 install --no-cache-dir pytesseract ipyparallel py7zr cython isort html2text jsoncsv simplejson detect wheel nltk keras bokeh seaborn matplotlib graphviz plotly tqdm
200-
RUN pip3 install --no-cache-dir pymssql mysql-connector-python cx-Oracle dataclasses numpy matplotlib pandas dill jsonpickle jsonext psycopg2 psycopg2-binary pyodbc openpyxl
201-
RUN pip3 install --no-cache-dir dvc flask GitPython elasticsearch opensearch-py neo4j eland --ignore-installed PyYAML
202-
RUN pip3 install --no-cache-dir opencv-python torchvision eland plotly
203-
204-
# XNAT
205-
RUN pip3 install --no-cache-dir xnat
206-
207-
# medcat & models
208-
RUN pip3 install --no-cache-dir -U spacy click torch thinc
194+
# install requirements for working with cogstack scripts
195+
RUN pip3 install --no-cache-dir -r notebooks/demo_working_with_cogstack/requirements.txt
209196

210197
# Get the spacy model
211198
#ARG SPACY_MODELS="en_core_web_sm en_core_web_md en_core_web_lg"
212199
#RUN for spacy_model in ${SPACY_MODELS}; do python3 -m spacy download $spacy_model; done
213200

214-
# install requirements for working with cogstack scripts
215-
# RUN pip3 install --no-cache-dir -r notebooks/demo_working_with_cogstack/requirements.txt
216-
217-
RUN pip3 install --no-cache-dir medcat==1.14.0
218-
219201
# clean up pip
220202
RUN pip3 cache purge
221203

222204
#######################################################################################################
223205

224206
# install R and other dependencies
225-
COPY ./scripts/r_kernel_install.sh /etc/jupyterhub/
226-
RUN Rscript /etc/jupyterhub/r_kernel_install.sh
207+
COPY ./scripts/r_kernel_install.sh /srv/jupyterhub/
208+
RUN Rscript /srv/jupyterhub/r_kernel_install.sh
227209

228210
# create jupyterhub shared folder
229211
RUN mkdir -p /home/jovyan/scratch

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,20 @@ Images are available for both x86/ARM architectures (post version 1.2.7):
2525

2626
Full and more in-depth knowledge on the configuration itself is available in the primary repository [official documentation](https://cogstack-nifi.readthedocs.io/en/latest/deploy/services.html#id12).
2727

28+
2829
# Usage & configuration
2930

3031
ENV variables are located in: [env/jupyter.env](./env/jupyter.env) and [env/general.env](./env/general.env).\
3132
Please check the ENV file for additional information, every variable is commented and described.
3233

34+
## Python packages installed
35+
36+
Full list found in [requirements.txt](./requirements.txt).
37+
38+
## Security
39+
40+
Certificates used are located in the `./security/` folder, taken from the [Cogstack-NiFi](https://github.com/CogStack-NiFi) security folder, [root-ca.key](https://raw.githubusercontent.com/CogStack/CogStack-NiFi/refs/heads/master/security/root-ca.key) and [root-ca.pem](https://raw.githubusercontent.com/CogStack/CogStack-NiFi/refs/heads/master/security/root-capem), read the [security section](https://cogstack-nifi.readthedocs.io/en/latest/security.html) for more info on how to generate them from the main NiFi repository.
41+
3342
## Setting up your own hub
3443

3544
There are two docker compose files:
@@ -50,7 +59,6 @@ Updating certificates and env settings from the main repo:
5059
- sometimes it is necessary to grab new certificates if the old ones expired (from the main Cogstack-NiFi repo)
5160
- from the main repo directory, execute `bash scripts/update_env_cert_from_nifi_repo.sh`
5261

53-
5462
## Access and account control
5563
To access Jupyter Hub on the host machine (e.g.localhost), one can type in the browser `https://localhost:8888`.
5664

config/jupyter_notebook_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
# Configuration file for jupyter-notebook.
23

34
c = get_config()

config/jupyterhub_config.py

Lines changed: 75 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,26 @@
22
# source : https://github.com/jupyterhub/jupyterhub-deploy-docker/blob/main/basic-example/jupyterhub_config.py
33

44
import os
5-
import pwd
6-
import subprocess
75
import sys
86
import docker
9-
import dockerspawner
7+
import dockerspawner
8+
import traceback
9+
#import traitlets
1010
from jupyterhub.auth import LocalAuthenticator
1111
from nativeauthenticator import NativeAuthenticator
12+
from traitlets.config import Config
1213

1314

1415
class LocalNativeAuthenticator(NativeAuthenticator, LocalAuthenticator):
1516
pass
1617

1718

18-
def pre_spawn_hook(spawner):
19-
username = str(spawner.user.name).lower()
20-
try:
21-
pwd.getpwnam(username)
22-
except KeyError:
23-
subprocess.check_call(["useradd", "-ms", "/bin/bash", username])
24-
25-
26-
c = get_config()
19+
c: Config = get_config()
2720

2821
# Spawn containers from this image
2922
# Either use the CoGstack one from the repo which is huge and contains all the stuff needed or,
3023
# use the default official one which is clean.
31-
c.DockerSpawner.image = os.getenv("DOCKER_NOTEBOOK_IMAGE", "cogstacksystems:jupyterhub/singleuser:latest")
24+
c.DockerSpawner.image = os.getenv("DOCKER_NOTEBOOK_IMAGE", "cogstacksystems:jupyterhub/singleuser:latest-amd64")
3225

3326
# JupyterHub requires a single-user instance of the Notebook server, so we
3427
# default to using the `start-singleuser.sh` script included in the
@@ -61,7 +54,7 @@ def pre_spawn_hook(spawner):
6154
# We follow the same convention.
6255
notebook_dir = os.environ.get("DOCKER_NOTEBOOK_DIR", "/home/jovyan/work")
6356
shared_content_dir = os.environ.get("DOCKER_SHARED_DIR", "/home/jovyan/scratch")
64-
57+
work_dir = os.environ.get("JUPYTER_WORK_DIR", "/lab/workspaces/auto-b/tree/" + str(notebook_dir.split("/")[-1]))
6558

6659
#c.DockerSpawner.notebook_dir = notebook_dir
6760
# Mount the real user"s Docker volume on the host to the notebook user"s
@@ -78,9 +71,9 @@ def pre_spawn_hook(spawner):
7871
if select_notebook_image_allowed == "true":
7972
# c.DockerSpawner.image_whitelist has been deprecated for allowed_images
8073
c.DockerSpawner.allowed_images = {
81-
'minimal': 'jupyterhub/singleuser:latest',
82-
'cogstack': 'cogstacksystems/jupyter-singleuser:latest',
83-
'cogstack-gpu': 'cogstacksystems/jupyter-singleuser-gpu:latest'
74+
"minimal": "jupyterhub/singleuser:latest-amd64",
75+
"cogstack": "cogstacksystems/jupyter-singleuser:latest-amd64",
76+
"cogstack-gpu": "cogstacksystem s/jupyter-singleuser-gpu:latest-amd64"
8477
}
8578
# https://github.com/jupyterhub/dockerspawner/issues/423
8679
c.DockerSpawner.remove = True
@@ -103,27 +96,71 @@ def pre_spawn_hook(spawner):
10396
"no_proxy": ",".join(list(filter(len, os.environ.get("no_proxy", "").split(",") + [hub_container_ip_or_name]))),
10497
}
10598

106-
os.environ['NO_PROXY'] = ''
107-
os.environ['no_proxy'] = ''
108-
os.environ['HTTP_PROXY'] = ''
109-
os.environ['HTTPS_PROXY'] = ''
110-
os.environ['http_proxy'] = ''
111-
os.environ['https_proxy'] = ''
99+
os.environ["NO_PROXY"] = ""
100+
os.environ["no_proxy"] = ""
101+
os.environ["HTTP_PROXY"] = ""
102+
os.environ["HTTPS_PROXY"] = ""
103+
os.environ["http_proxy"] = ""
104+
os.environ["https_proxy"] = ""
105+
106+
107+
"""
108+
def pre_spawn_hook(spawner):
109+
username = str(spawner.user.name).lower()
110+
try:
111+
pwd.getpwnam(username)
112+
except KeyError:
113+
subprocess.check_call(["useradd", "-ms", "/bin/bash", username])
114+
"""
115+
116+
117+
# Spawn single-user servers as Docker containers
118+
class DockerSpawner(dockerspawner.DockerSpawner):
119+
def start(self):
120+
# username is self.user.name
121+
self.volumes = {"jupyterhub-user-{}".format(self.user.name): notebook_dir}
122+
123+
if self.user.name not in whitelist:
124+
whitelist.add(self.user.name)
125+
with open(userlist_path, "a") as f:
126+
f.write("\n")
127+
f.write(self.user.name)
128+
129+
if self.user.name in list(team_map.keys()):
130+
for team in team_map[self.user.name]:
131+
team_dir_path = os.path.join(shared_content_dir, team)
132+
self.volumes["jupyterhub-team-{}".format(team)] = {
133+
"bind": team_dir_path,
134+
"mode": "rw", # or ro for read-only
135+
}
136+
137+
# this is a temporary fix, need to actually check permissions
138+
self.mem_limit = resource_allocation_user_ram_limit
139+
self.post_start_cmd = "chmod -R 777 " + shared_content_dir
140+
141+
return super().start()
112142

113143

114-
def post_spawn_hook(spawner):
144+
def pre_spawn_hook(spawner: DockerSpawner):
145+
#username = spawner.user.name
146+
#spawner.environment["GREETING"] = f"Hello {username}"
115147
try:
116-
with open("/etc/environment", "w+") as f:
148+
for key, value in ENV_PROXIES.items():
149+
spawner.environment[str(key)] = str(value)
150+
with open("/home/jovyan/test.txt", "w+") as f:
117151
for key, value in ENV_PROXIES.items():
118-
f.write(str(key) + "=" + str(value) + "\n")
119-
except KeyError:
120-
pass
152+
f.write("export" + " " + str(key) + "=" + str(value) + "\n")
153+
except Exception:
154+
traceback.print_exc()
121155

122156

123-
c.Spawner.post_spawn_hook = post_spawn_hook
157+
c.Spawner.default_url = work_dir
158+
c.Spawner.pre_spawn_hook = pre_spawn_hook
124159

125-
#c.Spawner.pre_spawn_hook = pre_spawn_hook
126160
#c.Spawner.ip = "127.0.0.1"
161+
162+
# This is buggy, setting the HTTP(s)_PROXY & NO_PROXY variables via pre/post
163+
# spawn hook is better
127164
#c.Spawner.environment = ENV_PROXIES
128165

129166
# AUTHENTICATION
@@ -139,7 +176,10 @@ def post_spawn_hook(spawner):
139176
c.LocalAuthenticator.create_system_users = True
140177
c.SystemdSpawner.dynamic_users = True
141178
c.PAMAuthenticator.admin_groups = {"wheel"}
142-
c.Authenticator.whitelist = whitelist = set()
179+
c.Authenticator.allowed_users = whitelist = set()
180+
181+
182+
#c.Authenticator.manage_groups = True
143183

144184
#c.Authenticator.allow_all = True
145185

@@ -163,32 +203,6 @@ def per_user_limit(role):
163203
return ram_limits.get(role)
164204

165205

166-
# Spawn single-user servers as Docker containers
167-
class DockerSpawner(dockerspawner.DockerSpawner):
168-
def start(self):
169-
# username is self.user.name
170-
self.volumes = {"jupyterhub-user-{}".format(self.user.name): notebook_dir}
171-
172-
if self.user.name not in whitelist:
173-
whitelist.add(self.user.name)
174-
with open(userlist_path , "a") as f:
175-
f.write("\n")
176-
f.write(self.user.name)
177-
178-
if self.user.name in list(team_map.keys()):
179-
for team in team_map[self.user.name]:
180-
team_dir_path = os.path.join(shared_content_dir, team)
181-
self.volumes["jupyterhub-team-{}".format(team)] = {
182-
"bind": team_dir_path,
183-
"mode": "rw", # or ro for read-only
184-
}
185-
186-
# this is a temporary fix, need to actually check permissions
187-
self.mem_limit = resource_allocation_user_ram_limit
188-
self.post_start_cmd = "chmod -R 777 " + shared_content_dir
189-
190-
return super().start()
191-
192206

193207
# Spawn single-user servers as Docker containers
194208
c.JupyterHub.spawner_class = DockerSpawner
@@ -252,11 +266,13 @@ def start(self):
252266
}
253267
c.DockerSpawner.environment.update(ENV_PROXIES)
254268

269+
# Alternative, use: "nativeauthenticator.NativeAuthenticator"
255270
#c.JupyterHub.authenticator_class = LocalNativeAuthenticator
256271

257272
c.FirstUseAuthenticator.create_users = True
258273
c.JupyterHub.authenticator_class = "firstuseauthenticator.FirstUseAuthenticator"
259-
# Alternative, use: "nativeauthenticator.NativeAuthenticator"
274+
275+
260276

261277
# User containers will access hub by container name on the Docker network
262278
c.JupyterHub.ip = "0.0.0.0"
@@ -327,7 +343,7 @@ def start(self):
327343
# Default: False
328344
# c.Application.show_config_json = False
329345

330-
# Let's start with the least privilege, especially on a single host having limited resources
346+
# Let"s start with the least privilege, especially on a single host having limited resources
331347
c.JupyterHub.allow_named_servers = False
332348

333349
# Timeout (in seconds) to wait for spawners to initialize

0 commit comments

Comments
 (0)