Skip to content

Commit 81356d1

Browse files
authored
Single user notebooks (#62)
* Add admin option to set personal authentication for all clusters spawned from a single Hub * Option also available at the property level for users if custom clusters are allowed * Option also available at the template level (YAML) and adapt the key to match to the Hub user identity
1 parent 71b8b3c commit 81356d1

File tree

6 files changed

+117
-13
lines changed

6 files changed

+117
-13
lines changed

Diff for: dataprocspawner/spawner.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,13 @@ class DataprocSpawner(Spawner):
333333
Whether to show spawned single-user clusters in the list of Dataproc Notebooks.
334334
""")
335335

336+
force_single_user = Bool(
337+
False,
338+
config=True,
339+
help="""Whether a notebook on a cluster can only be accessed by the user who
340+
spawned it.
341+
""")
342+
336343
def __init__(self, *args, **kwargs):
337344
mock = kwargs.pop('_mock', False)
338345
super().__init__(*args, **kwargs)
@@ -950,7 +957,8 @@ async def get_cluster(self, clustername):
950957
################################################################################
951958

952959
def get_username(self, raw=False):
953-
return self.user.name if raw else re.sub(r'[^a-zA-Z0-9=]', '-', str(self.user.name))
960+
username, _ = (self.user.name.split('@') + [None])[:2]
961+
return username if raw else re.sub(r'[^a-zA-Z0-9=]', '-', str(username))
954962

955963
def clustername(self, cluster_name=None):
956964
""" JupyterHub provides a notebook per user, so the username is used to
@@ -1301,7 +1309,7 @@ def _apply_users_configs(self, cluster_data):
13011309
int(self.user_options.get('sec_worker_node_amount'))
13021310

13031311
autoscaling_policy = self.user_options.get('autoscaling_policy', '')
1304-
if autoscaling_policy != 'None':
1312+
if autoscaling_policy and autoscaling_policy != 'None':
13051313
cluster_data['config']['autoscaling_config'] = {
13061314
'policy_uri': (
13071315
f"""https://www.googleapis.com/compute/v1/projects/"""
@@ -1343,6 +1351,7 @@ def _build_cluster_config(self, cluster_data=None):
13431351
# but must be set in case there is no form.
13441352
cluster_data = cluster_data or {}
13451353
cluster_zone = self.zone
1354+
personal_auth_property = 'dataproc:dataproc.personal-auth.user'
13461355

13471356
# Sets the cluster definition with form data.
13481357
if self.user_options:
@@ -1463,6 +1472,15 @@ def _build_cluster_config(self, cluster_data=None):
14631472
else:
14641473
cluster_data['config']['software_config']['image_version'] = '1.4-debian9'
14651474

1475+
# A user can only set 'dataproc:dataproc.personal-auth.user' for themselves.
1476+
# Otherwise the CG Url is not accessible by either identities. So, if set,
1477+
# the personal auth property can only have the value of self.user.name. For
1478+
# that reason, there is no priority between yaml and user.
1479+
if (self.force_single_user or
1480+
personal_auth_property in cluster_data['config']['software_config']['properties']):
1481+
(cluster_data['config']['software_config']['properties']
1482+
[personal_auth_property]) = self.user.name
1483+
14661484
# Forces Component Gateway
14671485
cluster_data['config'].setdefault('endpoint_config', {})
14681486
cluster_data['config']['endpoint_config']['enable_http_port_access'] = True

Diff for: docker/jupyterhub.sh

+7
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ function set-environment-from-metadata {
104104
local machine_types_list
105105
local notebooks_examples_location
106106
local hub_allow_named_servers
107+
local force_single_user
107108

108109
notebooks_location=$( curl -s -f \
109110
${metadata_base_url}/instance/attributes/notebooks-location -H "Metadata-Flavor: Google" || echo )
@@ -123,6 +124,8 @@ function set-environment-from-metadata {
123124
${metadata_base_url}/instance/attributes/notebooks-examples-location -H "Metadata-Flavor: Google" || echo )
124125
hub_allow_named_servers=$( curl -s -f \
125126
${metadata_base_url}/instance/attributes/hub-allow-named-servers -H "Metadata-Flavor: Google" || echo )
127+
force_single_user=$( curl -s -f \
128+
${metadata_base_url}/instance/attributes/force-single-user -H "Metadata-Flavor: Google" || echo )
126129

127130
if [ -n "${notebooks_location}" ]; then
128131
export NOTEBOOKS_LOCATION="${notebooks_location}"
@@ -151,6 +154,10 @@ function set-environment-from-metadata {
151154
if [ -n "${hub_allow_named_servers}" ]; then
152155
export HUB_ALLOW_NAMED_SERVERS="${hub_allow_named_servers}"
153156
fi
157+
if [ -n "${force_single_user}" ]; then
158+
export FORCE_SINGLE_USER="${force_single_user}"
159+
fi
160+
154161
}
155162

156163
# 'set-region-and-zone-from-metadata'

Diff for: docker/jupyterhub_config.py

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def is_true(boolstring: str):
6767
c.DataprocSpawner.allow_custom_clusters = is_true(os.environ.get('DATAPROC_ALLOW_CUSTOM_CLUSTERS', ''))
6868
c.DataprocSpawner.allow_random_cluster_names = is_true(os.environ.get('ALLOW_RANDOM_CLUSTER_NAMES', ''))
6969
c.DataprocSpawner.show_spawned_clusters_in_notebooks_list = is_true(os.environ.get('SHOW_SPAWNED_CLUSTERS', ''))
70+
c.DataprocSpawner.force_single_user = is_true(os.environ.get('FORCE_SINGLE_USER', ''))
7071
c.DataprocSpawner.gcs_notebooks = os.environ.get('GCS_NOTEBOOKS', '')
7172
if not c.DataprocSpawner.gcs_notebooks:
7273
c.DataprocSpawner.gcs_notebooks = os.environ.get('NOTEBOOKS_LOCATION', '')

Diff for: examples/deploy_local.sh

+2
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,6 @@ docker run -it \
4040
-e PROJECT="${PROJECT}" \
4141
-e DATAPROC_CONFIGS="${CONFIGS_LOCATION}" \
4242
-e JUPYTERHUB_REGION="us-west1" \
43+
-e DATAPROC_ALLOW_CUSTOM_CLUSTERS=true \
44+
-e FORCE_SINGLE_USER=true \
4345
"${DOCKER_IMAGE}"

Diff for: tests/test_data/perso.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
clusterName: 'personal'
2+
config:
3+
softwareConfig:
4+
properties:
5+
dataproc:dataproc.personal-auth.user: '[email protected]'

Diff for: tests/test_spawner.py

+82-11
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def set_delay_done(self):
6161
self.op_done = True
6262

6363
class MockUser(mock.Mock):
64-
name = 'fake'
64+
name = 'fake@example.com'
6565
base_url = '/user/fake'
6666
server = Server()
6767

@@ -112,7 +112,7 @@ async def test_get_cluster_notebook_endpoint(*args, **kwargs):
112112

113113
mock_client.create_cluster.assert_called_once()
114114

115-
assert spawner.cluster_definition['cluster_name'] == 'dataprochub-fake'
115+
assert spawner.cluster_definition['cluster_name'] == f'dataprochub-{spawner.get_username()}'
116116
assert (spawner.cluster_definition['config']['gce_cluster_config']['zone_uri']) == (
117117
f'https://www.googleapis.com/compute/v1/projects/{spawner.project}/zones/{spawner.zone}')
118118

@@ -150,7 +150,7 @@ async def test_get_cluster_notebook_endpoint(*args, **kwargs):
150150
url = await spawner.start()
151151
mock_client.create_cluster.assert_called_once()
152152

153-
assert spawner.cluster_definition['cluster_name'] == 'dataprochub-fake-server1'
153+
assert spawner.cluster_definition['cluster_name'] == f'dataprochub-{spawner.get_username()}-server1'
154154

155155
@pytest.mark.asyncio
156156
async def test_start_existing_clustername(self, monkeypatch):
@@ -195,7 +195,7 @@ async def test_stop_normal(self):
195195
mock_client.delete_cluster.assert_called_once_with(
196196
project_id='test-stop',
197197
region=self.region,
198-
cluster_name='dataprochub-fake')
198+
cluster_name=f'dataprochub-{spawner.get_username()}')
199199

200200
@pytest.mark.asyncio
201201
async def test_stop_no_cluster(self):
@@ -374,13 +374,11 @@ def test_minimium_cluster_definition(self, monkeypatch):
374374

375375
def test_read_file(*args, **kwargs):
376376
config_string = open('./tests/test_data/minimum.yaml', 'r').read()
377-
print(config_string)
378377
return config_string
379378

380379
def test_read_file_preview(*args, **kwargs):
381380
config_string = open('./tests/test_data/minimum.yaml', 'r').read()
382381
config_string = config_string.replace('1.4.16', 'preview')
383-
print(config_string)
384382
return config_string
385383

386384
def test_read_file_2_0(*args, **kwargs):
@@ -435,7 +433,6 @@ def test_clustername(*args, **kwargs):
435433
monkeypatch.setattr(spawner, "read_gcs_file", test_read_file_preview)
436434

437435
config_built = spawner._build_cluster_config()
438-
print(config_built)
439436
assert Component['JUPYTER'].value in config_built['config']['software_config']['optional_components']
440437
assert Component['ANACONDA'].value not in config_built['config']['software_config']['optional_components']
441438

@@ -534,6 +531,7 @@ def test_clustername(*args, **kwargs):
534531
assert config_built['config']['software_config']['properties']['dataproc:jupyter.hub.env'] == 'test-env-str'
535532
assert config_built['config']['software_config']['properties']['dataproc:jupyter.hub.menu.enabled'] == 'true'
536533
assert 'dataproc:jupyter.hub.enabled' not in config_built['config']['software_config']['properties']
534+
assert 'dataproc:dataproc.personal-auth.user' not in config_built['config']['software_config']['properties']
537535

538536
def test_cluster_definition_overrides(self, monkeypatch):
539537
"""Check that config settings incompatible with JupyterHub are overwritten correctly."""
@@ -665,7 +663,7 @@ def test_clustername(*args, **kwargs):
665663
'KeyCamelCase': 'UlowUlow',
666664
'key_with_underscore': 'https://downloads.io/protected/files/enterprise-trial.tar.gz',
667665
'key_with_underscore_too': 'some_UPPER_and_UlowerU:1234',
668-
'session-user': MockUser.name
666+
'session-user': spawner.get_username()
669667
},
670668
'zone_uri': 'https://www.googleapis.com/compute/v1/projects/test-project/zones/test-form1-a'
671669
},
@@ -689,7 +687,7 @@ def test_clustername(*args, **kwargs):
689687
'dataproc:jupyter.hub.env': 'test-env-str',
690688
'dataproc:jupyter.hub.menu.enabled': 'true',
691689
'dataproc:jupyter.instance-tag.enabled': 'false',
692-
'dataproc:jupyter.notebook.gcs.dir': 'gs://users-notebooks/fake',
690+
'dataproc:jupyter.notebook.gcs.dir': f'gs://users-notebooks/{spawner.get_username()}',
693691
'key-with-dash:UPPER_UPPER': '4000',
694692
'key-with-dash-too:UlowUlowUlow': '85196m',
695693
'key:and.multiple.dots.lowUlowUlow': '13312m'
@@ -775,7 +773,7 @@ def test_clustername(*args, **kwargs):
775773
assert config_built['config']['gce_cluster_config']['metadata'] == {
776774
'm1': 'v1',
777775
'm2': 'v2',
778-
'session-user': MockUser.name
776+
'session-user': spawner.get_username()
779777
}
780778

781779
def test_uris(self, monkeypatch):
@@ -998,7 +996,6 @@ def test_read_file(*args, **kwargs):
998996

999997
# Now check that the config with resolved fields is correct as well
1000998
config_built = spawner._build_cluster_config()
1001-
print(config_built)
1002999

10031000
assert 'unknown_field_top_level' not in config_built
10041001
assert 'unknown_field_config_level' not in config_built['config']
@@ -1193,3 +1190,77 @@ def test_image_version(*args, **kwargs):
11931190

11941191
assert config_built['config']['software_config']['image_version'] == '1.5-debian10'
11951192
assert config_built['config']['master_config']['image_uri'] == 'projects/test-project/global/images/custom-image'
1193+
1194+
def test_unified_auth_flag(self, monkeypatch):
1195+
fake_creds = AnonymousCredentials()
1196+
mock_dataproc_client = mock.create_autospec(ClusterControllerClient(credentials=fake_creds))
1197+
mock_gcs_client = mock.create_autospec(storage.Client(credentials=fake_creds, project='project'))
1198+
mock_compute_client = mock.create_autospec(discovery.build('compute', 'v1',
1199+
credentials=fake_creds, cache_discovery=False))
1200+
spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client,
1201+
user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks,
1202+
compute=mock_compute_client, project='test-project')
1203+
1204+
spawner.force_single_user = True
1205+
spawner.env_str = "test-env-str"
1206+
spawner.args_str = "test-args-str"
1207+
config_built = spawner._build_cluster_config()
1208+
assert (config_built['config']['software_config']['properties']
1209+
['dataproc:dataproc.personal-auth.user']) == spawner.user.name
1210+
1211+
def test_unified_auth_yaml(self, monkeypatch):
1212+
fake_creds = AnonymousCredentials()
1213+
mock_dataproc_client = mock.create_autospec(ClusterControllerClient(credentials=fake_creds))
1214+
mock_gcs_client = mock.create_autospec(storage.Client(credentials=fake_creds, project='project'))
1215+
mock_compute_client = mock.create_autospec(discovery.build('compute', 'v1',
1216+
credentials=fake_creds, cache_discovery=False))
1217+
spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client,
1218+
user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks,
1219+
compute=mock_compute_client, project='test-project')
1220+
1221+
def test_read_file(*args, **kwargs):
1222+
config_string = open('./tests/test_data/perso.yaml', 'r').read()
1223+
return config_string
1224+
1225+
monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
1226+
spawner.env_str = "test-env-str"
1227+
spawner.args_str = "test-args-str"
1228+
spawner.user_options = {
1229+
'cluster_type': 'perso.yaml',
1230+
'cluster_zone': 'us-central-1'
1231+
}
1232+
1233+
config_built = spawner._build_cluster_config()
1234+
1235+
assert (config_built['config']['software_config']['properties']
1236+
['dataproc:dataproc.personal-auth.user']) == spawner.user.name
1237+
1238+
def test_unified_auth_user(self, monkeypatch):
1239+
fake_creds = AnonymousCredentials()
1240+
mock_dataproc_client = mock.create_autospec(ClusterControllerClient(credentials=fake_creds))
1241+
mock_gcs_client = mock.create_autospec(storage.Client(credentials=fake_creds, project='project'))
1242+
mock_compute_client = mock.create_autospec(discovery.build('compute', 'v1',
1243+
credentials=fake_creds, cache_discovery=False))
1244+
spawner = DataprocSpawner(hub=Hub(), dataproc=mock_dataproc_client, gcs=mock_gcs_client,
1245+
user=MockUser(), _mock=True, gcs_notebooks=self.gcs_notebooks,
1246+
compute=mock_compute_client, project='test-project')
1247+
1248+
def test_read_file(*args, **kwargs):
1249+
config_string = open('./tests/test_data/perso.yaml', 'r').read()
1250+
return config_string
1251+
1252+
monkeypatch.setattr(spawner, "read_gcs_file", test_read_file)
1253+
spawner.env_str = "test-env-str"
1254+
spawner.args_str = "test-args-str"
1255+
spawner.allow_custom_clusters = True
1256+
spawner.user_options = {
1257+
'cluster_type': 'perso.yaml',
1258+
'cluster_zone': 'us-central-1',
1259+
"cluster_props_prefix_0": "dataproc",
1260+
"cluster_props_key_0": "dataproc.personal-auth.user",
1261+
"cluster_props_val_0": "[email protected]"
1262+
}
1263+
config_built = spawner._build_cluster_config()
1264+
assert (config_built['config']['software_config']['properties']
1265+
['dataproc:dataproc.personal-auth.user']) == spawner.user.name
1266+

0 commit comments

Comments
 (0)