Skip to content

Refactoring - Add gcloud context manager #405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,19 @@ dev = [
]

[tool.setuptools]
packages = ["xpk", "xpk.parser", "xpk.core", "xpk.commands", "xpk.api", "xpk.templates", "xpk.utils", "xpk.core.blueprint", "xpk.core.remote_state", "xpk.core.workload_decorators"]
packages = [
"xpk",
"xpk.parser",
"xpk.core",
"xpk.commands",
"xpk.api",
"xpk.templates",
"xpk.utils",
"xpk.core.blueprint",
"xpk.core.remote_state",
"xpk.core.workload_decorators",
"xpk.core.gcloud"
]
package-dir = {"" = "src"}
package-data = {"xpk.api" = ["storage_crd.yaml"], "xpk.templates" = ["storage.yaml"]}

Expand Down
14 changes: 7 additions & 7 deletions src/xpk/commands/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,22 @@
limitations under the License.
"""

import re
from argparse import Namespace

from ..core.cluster import create_xpk_k8s_service_account
from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project
from ..core.kueue import LOCAL_QUEUE_NAME
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from ..core.gcloud.context import GCloudContextManager
from ..core.kjob import (
AppProfileDefaults,
prepare_kjob,
Kueue_TAS_annotation,
get_gcsfuse_annotation,
prepare_kjob,
)
from ..core.kueue import LOCAL_QUEUE_NAME
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from .kind import set_local_cluster_command
import re


def batch(args: Namespace) -> None:
Expand All @@ -41,7 +41,7 @@ def batch(args: Namespace) -> None:
None
"""
if not args.kind_cluster:
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)
set_cluster_command_code = set_cluster_command(args)
else:
set_cluster_command_code = set_local_cluster_command(args)
Expand Down
35 changes: 14 additions & 21 deletions src/xpk/commands/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,14 @@
install_nccl_on_cluster,
set_jobset_on_cluster,
setup_k8s_env,
update_cluster_with_gcpfilestore_driver_if_necessary,
update_cluster_with_gcsfuse_driver_if_necessary,
update_cluster_with_workload_identity_if_necessary,
)
from ..core.cluster_private import authorize_private_cluster_access_if_necessary
from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.config import VERTEX_TENSORBOARD_FEATURE_FLAG
from ..core.gcloud_context import (
add_zone_and_project,
get_gke_control_plane_version,
get_gke_server_config,
zone_to_region,
)
from ..core.gcloud.context import GCloudContextManager, GKEVersionManager
from ..core.kjob import apply_kjob_crds, prepare_kjob, verify_kjob_installed
from ..core.kueue import (
cluster_preheat_yml,
Expand Down Expand Up @@ -64,7 +60,6 @@
from ..utils.file import write_tmp_file
from . import cluster_gcluster
from .common import set_cluster_command
from ..core.cluster import update_cluster_with_gcpfilestore_driver_if_necessary


def cluster_create(args) -> None:
Expand All @@ -83,7 +78,7 @@ def cluster_create(args) -> None:
xpk_exit(return_code)

xpk_print(f'Starting cluster create for cluster {args.cluster}:', flush=True)
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)

if system.device_type in cluster_gcluster.supported_device_types:
xpk_print(
Expand All @@ -93,12 +88,10 @@ def cluster_create(args) -> None:
cluster_gcluster.cluster_create(args)
xpk_exit(0)

return_code, gke_server_config = get_gke_server_config(args)
if return_code != 0:
xpk_exit(return_code)
gke_server_config = GKEVersionManager(args)

return_code, gke_control_plane_version = get_gke_control_plane_version(
args, gke_server_config
return_code, gke_control_plane_version = (
gke_server_config.get_gke_control_plane_version()
)
if return_code != 0:
xpk_exit(return_code)
Expand Down Expand Up @@ -257,7 +250,7 @@ def cluster_create(args) -> None:
xpk_print(
'See your GKE Cluster here:'
# pylint: disable=line-too-long
f' https://console.cloud.google.com/kubernetes/clusters/details/{zone_to_region(args.zone)}/{args.cluster}/details?project={args.project}'
f' https://console.cloud.google.com/kubernetes/clusters/details/{GCloudContextManager.zone_to_region(args.zone)}/{args.cluster}/details?project={args.project}'
)
xpk_exit(0)

Expand All @@ -272,7 +265,7 @@ def cluster_delete(args) -> None:
0 if successful and 1 otherwise.
"""
xpk_print(f'Starting cluster delete for cluster: {args.cluster}', flush=True)
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)

if cluster_gcluster.created_by_gcluster(args):
xpk_print(f'Deleting {args.cluster} cluster using Cluster Toolkit...')
Expand Down Expand Up @@ -303,7 +296,7 @@ def cluster_cacheimage(args) -> None:
xpk_print(
f'Starting cluster cacheimage for cluster: {args.cluster}', flush=True
)
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)

get_cluster_credentials(args)
system, return_code = get_system_characteristics(args)
Expand Down Expand Up @@ -352,7 +345,7 @@ def cluster_describe(args) -> None:
0 if successful and 1 otherwise.
"""
xpk_print(f'Starting nodepool list for cluster: {args.cluster}', flush=True)
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)

get_cluster_credentials(args)

Expand Down Expand Up @@ -583,7 +576,7 @@ def cluster_list(args) -> None:
Returns:
0 if successful and 1 otherwise.
"""
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)
xpk_print(f'For project {args.project} and zone {args.zone}:', flush=True)
if run_gke_clusters_list_command(args):
xpk_exit(1)
Expand Down Expand Up @@ -675,7 +668,7 @@ def run_gke_cluster_delete_command(args) -> int:
command = (
'gcloud beta container clusters delete'
f' {args.cluster} --project={args.project}'
f' --region={zone_to_region(args.zone)} --quiet'
f' --region={GCloudContextManager.zone_to_region(args.zone)} --quiet'
)

return_code = run_command_with_updates(command, 'Cluster Delete', args)
Expand All @@ -701,7 +694,7 @@ def run_gke_clusters_list_command(args) -> int:
"""
command = (
'gcloud container clusters list'
f' --project={args.project} --region={zone_to_region(args.zone)}'
f' --project={args.project} --region={GCloudContextManager.zone_to_region(args.zone)}'
)
return_code = run_command_with_updates(command, 'Cluster List', args)
if return_code != 0:
Expand Down Expand Up @@ -748,7 +741,7 @@ def run_gke_cluster_create_command(
command = (
'gcloud beta container clusters create'
f' {args.cluster} --project={args.project}'
f' --region={zone_to_region(args.zone)}'
f' --region={GCloudContextManager.zone_to_region(args.zone)}'
f' --node-locations={args.zone}'
f' --cluster-version={gke_control_plane_version}'
f' --machine-type={machine_type}'
Expand Down
22 changes: 11 additions & 11 deletions src/xpk/commands/cluster_gcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,26 @@

import os

from ..core.remote_state.remote_state_client import RemoteStateClient
from ..core.remote_state.fuse_remote_state import FuseStateClient
from ..core.blueprint.blueprint_generator import (
BlueprintGenerator,
BlueprintGeneratorOutput,
a3mega_device_type,
a3ultra_device_type,
supported_device_types,
)
from ..core.commands import run_command_for_value
from ..core.capacity import get_capacity_type
from ..core.cluster import get_cluster_credentials
from ..core.commands import run_command_for_value
from ..core.docker_manager import DockerManager
from ..core.gcloud_context import zone_to_region
from ..core.gcloud.context import GCloudContextManager
from ..core.gcluster_manager import GclusterManager
from ..core.kjob import apply_kjob_crds, prepare_kjob
from ..core.remote_state.fuse_remote_state import FuseStateClient
from ..core.remote_state.remote_state_client import RemoteStateClient
from ..utils.console import xpk_exit, xpk_print
from ..utils.file import ensure_directory_exists
from ..utils.network import all_IPs_cidr
from ..utils.objects import hash_string
from ..core.cluster import get_cluster_credentials
from ..core.kjob import apply_kjob_crds, prepare_kjob

blueprints_path = os.path.abspath('xpkclusters/blueprints')
gcluster_working_dir = os.path.abspath('xpkclusters/gcluster-out')
Expand All @@ -53,7 +53,7 @@ def cluster_create(args) -> None:
"""
check_gcloud_authenticated()
prepare_directories()
region = zone_to_region(args.zone)
region = GCloudContextManager.zone_to_region(args.zone)

# unique_name uses shortened hash string, so still name collision is possible
unique_name = get_unique_name(args.project, region, args.cluster)
Expand Down Expand Up @@ -110,7 +110,7 @@ def cluster_delete(args) -> None:
"""
check_gcloud_authenticated()
prepare_directories()
region = zone_to_region(args.zone)
region = GCloudContextManager.zone_to_region(args.zone)
unique_name = get_unique_name(args.project, region, args.cluster)
# prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
prefix = get_prefix_path(args.project, region)
Expand Down Expand Up @@ -152,7 +152,7 @@ def cluster_delete(args) -> None:

def created_by_gcluster(args) -> bool:
prepare_directories()
region = zone_to_region(args.zone)
region = GCloudContextManager.zone_to_region(args.zone)
unique_name = get_unique_name(args.project, region, args.cluster)
prefix = get_prefix_path(args.project, region)
bpg = prepare_blueprint_generator()
Expand Down Expand Up @@ -237,7 +237,7 @@ def generate_blueprint(
blueprint_name=blueprint_name,
prefix=prefix,
cluster_name=args.cluster,
region=zone_to_region(args.zone),
region=GCloudContextManager.zone_to_region(args.zone),
project_id=args.project,
zone=args.zone,
auth_cidr=all_IPs_cidr,
Expand All @@ -254,7 +254,7 @@ def generate_blueprint(
blueprint_name=blueprint_name,
prefix=prefix,
cluster_name=args.cluster,
region=zone_to_region(args.zone),
region=GCloudContextManager.zone_to_region(args.zone),
project_id=args.project,
zone=args.zone,
auth_cidr=all_IPs_cidr,
Expand Down
5 changes: 2 additions & 3 deletions src/xpk/commands/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""

from ..core.commands import run_command_with_updates_retry
from ..core.gcloud_context import zone_to_region
from ..core.gcloud.context import GCloudContextManager
from ..utils.console import xpk_print


Expand All @@ -30,8 +30,7 @@ def set_cluster_command(args) -> int:
"""
command = (
'gcloud container clusters get-credentials'
f' {args.cluster} --region={zone_to_region(args.zone)}'
f' --project={args.project} &&'
f' {args.cluster} --region={GCloudContextManager.zone_to_region(args.zone)} --project={args.project} &&'
' kubectl config view && kubectl config set-context --current'
' --namespace=default'
)
Expand Down
4 changes: 2 additions & 2 deletions src/xpk/commands/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from tabulate import tabulate

from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project
from ..core.gcloud.context import GCloudContextManager
from ..core.kueue import verify_kueuectl
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
Expand All @@ -36,7 +36,7 @@ def info(args: Namespace) -> None:
Returns:
None
"""
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)
set_cluster_command_code = set_cluster_command(args)
if set_cluster_command_code != 0:
xpk_exit(set_cluster_command_code)
Expand Down
15 changes: 8 additions & 7 deletions src/xpk/commands/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from ..core.cluster import get_cluster_credentials
from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project, zone_to_region
from ..core.gcloud.context import GCloudContextManager
from ..core.kueue import CLUSTER_QUEUE_NAME, LOCAL_QUEUE_NAME
from ..core.resources import CLUSTER_METADATA_CONFIGMAP, CLUSTER_RESOURCES_CONFIGMAP
from ..utils.console import xpk_exit, xpk_print
Expand Down Expand Up @@ -120,7 +120,7 @@ def inspector(args) -> None:
final_return_code = 0
xpk_print(args)

add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)
get_cluster_credentials(args)

inspector_file = write_tmp_file(
Expand All @@ -138,7 +138,8 @@ def inspector(args) -> None:
(
(
'gcloud beta container clusters list --project'
f' {args.project} --region {zone_to_region(args.zone)} | grep -e'
f' {args.project} --region'
f' {GCloudContextManager.zone_to_region(args.zone)} | grep -e'
f' NAME -e {args.cluster}'
),
'GKE: Cluster Details',
Expand All @@ -160,7 +161,7 @@ def inspector(args) -> None:
(
(
f'gcloud beta container node-pools list --cluster {args.cluster} '
f' --project={args.project} --region={zone_to_region(args.zone)}'
f' --project={args.project} --region={GCloudContextManager.zone_to_region(args.zone)}'
),
'GKE: Node pool Details',
),
Expand Down Expand Up @@ -309,19 +310,19 @@ def inspector(args) -> None:
workload_links = [(
f'Cloud Console for the workload {args.workload}',
# pylint: disable=line-too-long
f'https://console.cloud.google.com/kubernetes/service/{zone_to_region(args.zone)}/{args.cluster}/default/{args.workload}/details?project={args.project}',
f'https://console.cloud.google.com/kubernetes/service/{GCloudContextManager.zone_to_region(args.zone)}/{args.cluster}/default/{args.workload}/details?project={args.project}',
)]

links = [
(
'Cloud Console for the GKE Cluster',
# pylint: disable=line-too-long
f'https://console.cloud.google.com/kubernetes/clusters/details/{zone_to_region(args.zone)}/{args.cluster}/details?project={args.project}',
f'https://console.cloud.google.com/kubernetes/clusters/details/{GCloudContextManager.zone_to_region(args.zone)}/{args.cluster}/details?project={args.project}',
),
(
'Cloud Console for all workloads in GKE Cluster',
# pylint: disable=line-too-long
f'https://console.cloud.google.com/kubernetes/workload/overview?project={args.project}&pageState=((gke%2F{zone_to_region(args.zone)}%2F{args.cluster}))',
f'https://console.cloud.google.com/kubernetes/workload/overview?project={args.project}&pageState=((gke%2F{GCloudContextManager.zone_to_region(args.zone)}%2F{args.cluster}))',
),
(
'Cloud Console for IAM Permissions',
Expand Down
6 changes: 3 additions & 3 deletions src/xpk/commands/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ruamel.yaml import YAML

from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.gcloud_context import add_zone_and_project
from ..core.gcloud.context import GCloudContextManager
from ..core.kjob import AppProfileDefaults
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
Expand Down Expand Up @@ -142,7 +142,7 @@ def job_list(args) -> None:
None
"""
if not args.kind_cluster:
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)
set_cluster_command_code = set_cluster_command(args)
msg = f'Listing jobs for project {args.project} and zone {args.zone}:'
else:
Expand Down Expand Up @@ -177,7 +177,7 @@ def job_cancel(args) -> None:
"""
xpk_print(f'Starting job cancel for job: {args.name}', flush=True)
if not args.kind_cluster:
add_zone_and_project(args)
GCloudContextManager.add_zone_and_project(args)
set_cluster_command_code = set_cluster_command(args)
else:
set_cluster_command_code = set_local_cluster_command(args)
Expand Down
Loading
Loading