Skip to content

Commit

Permalink
Update to newest Dataproc client library version (#74)
Browse files Browse the repository at this point in the history
* Update to newest Dataproc client library version

* Switch to v1 API, as v1beta2 API is deprecated. All new features
  should be released in the v1 API going forward
* Update handler test to account for
  jupyterhub/jupyterhub@5890064,
  which added new /hub/api handlers.
* Update linter action to v4 due to https://github.com/github/super-linter/issues/2255

Co-authored-by: Jerry Ding <[email protected]>
  • Loading branch information
JerryLeiDing and Jerry Ding authored Jan 29, 2022
1 parent 8859c08 commit 6636e9f
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 19 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:

# Run Linter against code base
- name: Lint Code Base
uses: github/super-linter@v3
uses: github/super-linter@v4
env:
# Only run against changed files
VALIDATE_ALL_CODEBASE: false
Expand Down
2 changes: 1 addition & 1 deletion dataprocspawner/spawnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import socket

from google.api_core import exceptions
from google.cloud.dataproc_v1beta2 import ClusterStatus
from google.cloud.dataproc_v1 import ClusterStatus
from jupyterhub.objects import Server
from jupyterhub.utils import exponential_backoff
from tornado import ioloop
Expand Down
16 changes: 8 additions & 8 deletions dataprocspawner/spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
from dataprocspawner.spawnable import DataprocHubServer
from google.api_core import exceptions
from google.cloud import logging_v2, storage
from google.cloud.dataproc_v1beta2 import (
from google.cloud.dataproc_v1 import (
Cluster,
ClusterControllerClient,
ClusterStatus,
)
from google.cloud.dataproc_v1beta2.services.cluster_controller.transports import (
from google.cloud.dataproc_v1.services.cluster_controller.transports import (
ClusterControllerGrpcTransport,
)
from google.cloud.dataproc_v1beta2.types.shared import Component
from google.cloud.dataproc_v1.types.shared import Component
from google.protobuf.json_format import MessageToDict
from googleapiclient import discovery
from jupyterhub import orm
Expand Down Expand Up @@ -726,7 +726,7 @@ def get_args(self):
if self.debug:
args.append('--debug')
args.append('--SingleUserNotebookApp.hub_activity_interval=0')
args.append('--SingleUserNotebookApp.hub_host={}'.format(self.hub_host))
args.append(f'--SingleUserNotebookApp.hub_host={self.hub_host}')
args.extend(self.args)
return args

Expand Down Expand Up @@ -996,9 +996,9 @@ def _split_gcs_path(self, path: str):
gcs_prefix = 'gs://'
if path.startswith(gcs_prefix):
path = path[len(gcs_prefix):]
path = path.split('/')
bucket = path[0]
folder = '/'.join(path[1:])
path_components = path.split('/')
bucket = path_components[0]
folder = '/'.join(path_components[1:])
if not folder.endswith('/'):
folder += '/'
return bucket, folder
Expand Down Expand Up @@ -1101,7 +1101,7 @@ def _image_version_supports_anaconda(self, image_version):
try:
major_version = int(parts[0].split('.')[0])
except ValueError as e:
self.log.warning('Failed to parse image version "%s": %s' % (image_version, e))
self.log.warning(f'Failed to parse image version "{image_version}": {e}')
# Something weird is going on with image version format, fail open
return True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
license='Apache 2.0',
install_requires=[
'tornado>=5.0',
'google-cloud-dataproc>=2.0.0',
'google-cloud-dataproc>=3.0.0',
'google-cloud-storage>=1.25.0',
'traitlets>=4.3.2',
'google-cloud-core>=1.3.0',
Expand Down
4 changes: 3 additions & 1 deletion tests/test_data/handlers_expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
["/hub/token", "handlers.pages.TokenPageHandler"],
["/hub/error/(\\d+)", "handlers.pages.ProxyErrorHandler"],
["/hub/health$", "handlers.pages.HealthCheckHandler"],
["/hub/api/health$", "handlers.pages.HealthCheckHandler"],
["/hub/login", "handlers.login.LoginHandler"],
["/hub/logout", "handlers.login.LogoutHandler"],
["/hub/metrics$", "handlers.metrics.MetricsHandler"],
["/hub/api/metrics$", "handlers.metrics.MetricsHandler"],
["/hub/api/authorizations/cookie/([^/]+)(?:/([^/]+))?", "apihandlers.auth.CookieAPIHandler"],
["/hub/api/authorizations/token/([^/]+)", "apihandlers.auth.TokenAPIHandler"],
["/hub/api/authorizations/token", "apihandlers.auth.TokenAPIHandler"],
Expand Down Expand Up @@ -49,4 +51,4 @@
["/(user|services)/([^/]+)", "handlers.base.AddSlashHandler"],
["(?!/hub/).*", "handlers.base.PrefixRedirectHandler"],
["(.*)", "handlers.base.Template404"]
]
]
4 changes: 2 additions & 2 deletions tests/test_dataprochub.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
from dataprochub.app import DataprocHub, DataprocHubUserUrlHandler
from dataprochub.proxy import RedirectProxy
from google.auth.credentials import AnonymousCredentials
from google.cloud.dataproc_v1beta2 import (
from google.cloud.dataproc_v1 import (
ClusterControllerClient, Cluster, ClusterStatus)
from google.cloud.dataproc_v1beta2.types.shared import Component
from google.cloud.dataproc_v1.types.shared import Component
from google.longrunning import operations_pb2
from google.cloud import storage, logging_v2
from google.cloud.logging_v2.types import LogEntry
Expand Down
10 changes: 5 additions & 5 deletions tests/test_spawner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
import dataprocspawner
from dataprocspawner import DataprocSpawner
from google.auth.credentials import AnonymousCredentials
from google.cloud.dataproc_v1beta2 import (
from google.cloud.dataproc_v1 import (
ClusterControllerClient, Cluster, ClusterStatus)
from google.cloud.dataproc_v1beta2.types.shared import Component
from google.cloud.dataproc_v1.types.shared import Component
from google.longrunning import operations_pb2
from google.cloud import storage, logging_v2
from google.cloud.logging_v2.types import LogEntry
Expand Down Expand Up @@ -1003,12 +1003,12 @@ def test_read_file(*args, **kwargs):
assert len(warnings) == 7
expected_warnings = [
'Removing unknown/bad value BAD_ENUM_VALUE for field consume_reservation_type.',
"Removing unknown field unknown_field for class <class 'google.cloud.dataproc_v1beta2.types.clusters.NodeInitializationAction'>",
"Removing unknown field unknown_field for class <class 'google.cloud.dataproc_v1.types.clusters.NodeInitializationAction'>",
'Removing unknown/bad value UNKNOWN_COMPONENT_1 for field optional_components.',
'Removing unknown/bad value UNKNOWN_COMPONENT_2 for field optional_components.',
'Removing unknown/bad value UNKNOWN_COMPONENT_3 for field optional_components.',
"Removing unknown field unknown_field_config_level for class <class 'google.cloud.dataproc_v1beta2.types.clusters.ClusterConfig'>",
"Removing unknown field unknown_field_top_level for class <class 'google.cloud.dataproc_v1beta2.types.clusters.Cluster'>",
"Removing unknown field unknown_field_config_level for class <class 'google.cloud.dataproc_v1.types.clusters.ClusterConfig'>",
"Removing unknown field unknown_field_top_level for class <class 'google.cloud.dataproc_v1.types.clusters.Cluster'>",
]
for w in expected_warnings:
assert w in warnings, f'Expected message {w} in warnings {warnings}'
Expand Down

0 comments on commit 6636e9f

Please sign in to comment.