Skip to content

Commit b850df0

Browse files
committed
Making CloudDNS optional for Pathways-enabled clusters.
1 parent d88b092 commit b850df0

File tree

5 files changed

+29
-9
lines changed

5 files changed

+29
-9
lines changed

README.md

+10-2
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,14 @@ all zones.
180180
--num-slices=4 --on-demand \
181181
--tpu-type=v5litepod-16
182182
```
183+
Please specify `--enable-clouddns` if you would like CloudDNS to be the
184+
DNS provider for the Pathways cluster. For example,
185+
```shell
186+
python3 xpk.py cluster create-pathways \
187+
--cluster xpk-pw-test-clouddns \
188+
--num-slices=4 --on-demand \
189+
--tpu-type=v5litepod-16
190+
```
183191

184192
* Cluster Create can be called again with the same `--cluster name` to modify
185193
the number of slices or retry failed steps.
@@ -370,8 +378,8 @@ will fail the cluster creation process because Vertex AI Tensorboard is not supp
370378
--tpu-type=v5litepod-16 \
371379
--cluster xpk-pw-test
372380
```
373-
Executing the command above would provide the address of the proxy that the user job should connect to.
374-
Specify `JAX_PLATFORMS=proxy` and `JAX_BACKEND_TARGET=<proxy address from above>` and `import previewutilies` to establish this connection between the user's JAX code and the Pathways proxy. Execute Pathways workloads interactively on Vertex AI notebooks!
381+
Executing the command above would provide the address of the proxy that the user job should connect to. Users would need to use kubectl port-forwarding to establish connection from the notebook/VM to the proxy.
382+
Specify `JAX_PLATFORMS=proxy` and `JAX_BACKEND_TARGET=<proxy address from above>` and `import pathwaysutils` to establish this connection between the user's JAX code and the Pathways proxy. Execute Pathways workloads interactively on Vertex AI notebooks!
375383
376384
### Set `max-restarts` for production jobs
377385

src/xpk/commands/cluster.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def cluster_create(args) -> None:
8888
xpk_exit(create_cluster_command_code)
8989

9090
# Update Pathways clusters with CloudDNS if not enabled already.
91-
if args.enable_pathways:
91+
if args.enable_pathways and args.enable_clouddns:
9292
update_cluster_command_code = update_cluster_with_clouddns_if_necessary(
9393
args
9494
)
@@ -468,10 +468,15 @@ def run_gke_cluster_create_command(
468468
command += (
469469
' --enable-ip-alias'
470470
f' --create-subnetwork name={args.cluster}-subnetwork'
471-
' --cluster-dns=clouddns'
472-
' --cluster-dns-scope=vpc'
473-
f' --cluster-dns-domain={args.cluster}-domain'
474471
)
472+
if args.enable_clouddns:
473+
# Enables CloudDNS as the default provider of the Pathways cluster,
474+
# useful for Pathways headless mode workloads.
475+
command += (
476+
' --cluster-dns=clouddns'
477+
' --cluster-dns-scope=vpc'
478+
f' --cluster-dns-domain={args.cluster}-domain'
479+
)
475480

476481
return_code = run_command_with_updates(command, 'GKE Cluster Create', args)
477482
if return_code != 0:

src/xpk/commands/workload.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,9 @@ def workload_create(args) -> None:
326326

327327
if args.headless and not is_cluster_using_clouddns(args):
328328
xpk_print(
329-
'Please run xpk cluster create-pathways first, to upgrade and enable'
330-
' CloudDNS on your cluster.'
329+
'Cluster is not using CloudDNS, connect to the proxy server'
330+
' using kubectl port forwarding. '
331331
)
332-
xpk_exit(1)
333332

334333
set_cluster_command_code = set_cluster_command(args)
335334
if set_cluster_command_code != 0:

src/xpk/parser/cluster.py

+6
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,12 @@ def set_cluster_parser(cluster_parser):
168168
default=None,
169169
help='The tpu type to use, v5litepod-16, etc.',
170170
)
171+
cluster_create_pathways_optional_arguments.add_argument(
172+
'--enable-clouddns',
173+
type=bool,
174+
default=False,
175+
help='Enables CloudDNS on the Pathways cluster.',
176+
)
171177

172178
add_shared_cluster_create_required_arguments([
173179
cluster_create_required_arguments,

src/xpk/parser/workload.py

+2
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,8 @@ def add_shared_workload_create_optional_arguments(args_parsers):
521521
' headless mode. This arg can only be used in `xpk workload'
522522
' create-pathways`(preferred) or `xpk workload create'
523523
' --use-pathways.` (--use-pathways will be deprecated soon).'
524+
' Headless workloads may be created on clusters with/without '
525+
' CloudDNS.'
524526
),
525527
)
526528
custom_parser.add_argument(

0 commit comments

Comments
 (0)