Skip to content

Commit 5bebeba

Browse files
authored
Add Docker build and static templates (#38)
Integrates the Docker build step into the same repo. To build the container, run `docker build -f docker/Dockerfile .` from the root project repository. In addition, this PR adds functionality to enable the Hub menu on newer Dataproc images, even when not proxying through Dataproc Hub.
1 parent dbd81b6 commit 5bebeba

22 files changed

+1976
-20
lines changed

.github/workflows/linter.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
# Only run against changed files
3535
VALIDATE_ALL_CODEBASE: false
3636
# Don't lint tests
37-
FILTER_REGEX_EXCLUDE: tests/.*
37+
FILTER_REGEX_EXCLUDE: (tests/.*)|(docker/*)|(static/*)
3838
DEFAULT_BRANCH: master
3939
LINTER_RULES_PATH: .
4040
VALIDATE_PYTHON_FLAKE8: false

dataprocspawner/spawner.py

+50-19
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,35 @@
1313
# limitations under the License.
1414
"""A custom Spawner that creates notebooks backed by Dataproc clusters."""
1515

16+
import asyncio
1617
import json
17-
import re
18+
import math
1819
import os
1920
import random
21+
import re
2022
import string
21-
import proto
22-
import yaml
23-
import math
24-
import asyncio
2523
from datetime import datetime as dt
2624
from types import SimpleNamespace
2725

28-
from google.protobuf.json_format import MessageToDict
26+
import proto
27+
import requests
28+
import yaml
29+
from async_generator import aclosing, async_generator, yield_
30+
from dataprocspawner.customize_cluster import (get_base_cluster_html_form,
31+
get_custom_cluster_html_form)
32+
from dataprocspawner.spawnable import DataprocHubServer
2933
from google.api_core import exceptions
30-
from google.cloud import storage, logging_v2
31-
from google.cloud.dataproc_v1beta2 import (
32-
ClusterControllerClient, Cluster, ClusterStatus)
34+
from google.cloud import logging_v2, storage
35+
from google.cloud.dataproc_v1beta2 import (Cluster, ClusterControllerClient,
36+
ClusterStatus)
37+
from google.cloud.dataproc_v1beta2.services.cluster_controller.transports import \
38+
ClusterControllerGrpcTransport
3339
from google.cloud.dataproc_v1beta2.types.shared import Component
34-
35-
from google.cloud.dataproc_v1beta2.services.cluster_controller.transports import ClusterControllerGrpcTransport
36-
from traitlets import List, Unicode, Dict, Bool
37-
38-
from dataprocspawner.spawnable import DataprocHubServer
39-
from dataprocspawner.customize_cluster import (
40-
get_base_cluster_html_form, get_custom_cluster_html_form)
41-
42-
from async_generator import async_generator, yield_, aclosing
43-
40+
from google.protobuf.json_format import MessageToDict
4441
from jupyterhub import orm
4542
from jupyterhub.spawner import Spawner
43+
from traitlets import Bool, Dict, List, Unicode
44+
4645

4746
def url_path_join(*pieces):
4847
"""Join components of url into a relative url.
@@ -329,6 +328,21 @@ def __init__(self, *args, **kwargs):
329328
self.component_gateway_url = None
330329
self.progressor = SimpleNamespace(bar=0, logging=set(), start='')
331330

331+
# Check if we have a notebooks proxy URL
332+
self.hub_host = ''
333+
if 'hub_host' in kwargs:
334+
self.hub_host = kwargs.get('hub_host')
335+
else:
336+
try:
337+
r = requests.get(
338+
'http://metadata.google.internal/computeMetadata/v1/instance/attributes/proxy-url',
339+
headers={'Metadata-Flavor': 'Google'})
340+
r.raise_for_status()
341+
self.hub_host = f'https://{r.text}/'
342+
self.log.info(f'Got proxy url {r.text} from metadata')
343+
except Exception as e: ## pylint: disable=broad-except
344+
self.log.info(f'Failed to get proxy url from metadata: {e}')
345+
332346
if mock:
333347
# Mock the API
334348
self.dataproc_client = kwargs.get('dataproc')
@@ -646,6 +660,21 @@ def get_env(self):
646660
self.log.debug(f'env is {env}')
647661
return env
648662

663+
def get_args(self):
664+
"""Set arguments to pass to Jupyterhub-singleuser on the cluster.
665+
666+
Note that we don't call the superclass method, as we don't want to
667+
set default args like port and ip.
668+
"""
669+
args = []
670+
671+
if self.debug:
672+
args.append('--debug')
673+
args.append('--NotebookApp.hub_activity_interval=0')
674+
args.append('--NotebookApp.hub_host={}'.format(self.hub_host))
675+
args.extend(self.args)
676+
return args
677+
649678
################################################################################
650679
# Custom Functions
651680
################################################################################
@@ -1265,6 +1294,8 @@ def _build_cluster_config(self, cluster_data=None):
12651294
['dataproc:jupyter.hub.args']) = self.args_str
12661295
(cluster_data['config']['software_config']['properties']
12671296
['dataproc:jupyter.hub.env']) = self.env_str
1297+
(cluster_data['config']['software_config']['properties']
1298+
['dataproc:jupyter.hub.menu.enabled']) = 'true'
12681299

12691300
if self.gcs_user_folder:
12701301
(cluster_data['config']['software_config']['properties']

docker/Dockerfile

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
FROM jupyterhub/jupyterhub:latest
2+
3+
# Install gcloud
4+
RUN apt-get update && apt-get install -y curl apt-transport-https ca-certificates gnupg \
5+
&& curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \
6+
&& echo "deb https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list \
7+
&& apt-get update && apt-get install -y google-cloud-sdk
8+
9+
RUN apt-get update \
10+
&& apt-get remove -y python3-pycurl \
11+
&& apt-get install -y --no-install-recommends \
12+
libssl-dev \
13+
libcurl4-openssl-dev \
14+
python3-wheel \
15+
git \
16+
&& apt-get purge \
17+
&& apt-get clean -y
18+
19+
RUN pip install --upgrade pip \
20+
&& pip install --upgrade \
21+
psycopg2-binary \
22+
google-api-python-client \
23+
google-auth-oauthlib \
24+
google-api-python-client \
25+
oauth2client \
26+
google-auth \
27+
googleapis-common-protos \
28+
google-auth-httplib2
29+
30+
RUN apt-get update && apt-get install -y libpq-dev \
31+
&& apt-get install -y vim \
32+
&& apt-get install -y iproute2 \
33+
&& apt-get autoremove -y \
34+
&& apt-get clean -y
35+
36+
# Install Dataproc Spawner
37+
COPY setup.py /tmp/dataprocspawner/setup.py
38+
COPY dataprocspawner/ /tmp/dataprocspawner/dataprocspawner/
39+
COPY dataprochub/ /tmp/dataprocspawner/dataprochub/
40+
RUN pip install git+https://github.com/GoogleCloudPlatform/jupyterhub-gcp-proxies-authenticator
41+
RUN pip install /tmp/dataprocspawner/
42+
43+
# JupyterHub design
44+
COPY static/templates/ /etc/jupyterhub/templates/
45+
46+
COPY docker/jupyterhub_config.py .
47+
48+
EXPOSE 8080
49+
50+
LABEL "com.google.environment"="Dataproc Hub"
51+
52+
COPY docker/jupyterhub.sh /usr/local/bin/
53+
ENTRYPOINT ["bash", "-C", "/usr/local/bin/jupyterhub.sh"]

docker/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
To build, run `docker build -f docker/Dockerfile .` from the root project
2+
directory.

0 commit comments

Comments
 (0)