Skip to content

Commit f2e17ca

Browse files
authored
minor changes fro printing things (#22)
Co-authored-by: Atin Sood <[email protected]>
1 parent 2f74b63 commit f2e17ca

File tree

4 files changed

+26
-11
lines changed

4 files changed

+26
-11
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
dist/
2+
.python-version

src/codeflare_sdk/cluster/cluster.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@ def down(self, namespace='default'):
4343
oc.invoke("delete", ["AppWrapper", self.app_wrapper_name])
4444

4545
def status(self, print_to_console=True):
46-
cluster = _ray_cluster_status(self.config.name)
46+
cluster = _ray_cluster_status(self.config.name)
4747
if cluster:
48+
#overriding the number of gpus with requested
49+
cluster.worker_gpu = self.config.gpu
4850
if print_to_console:
4951
pretty_print.print_clusters([cluster])
5052
return cluster.status
@@ -92,6 +94,8 @@ def is_ready(self, print_to_console=True):
9294
status = CodeFlareClusterStatus.FAILED
9395

9496
if print_to_console:
97+
#overriding the number of gpus with requested
98+
cluster.worker_gpu = self.config.gpu
9599
pretty_print.print_clusters([cluster])
96100
return status, ready
97101

@@ -123,11 +127,16 @@ def _app_wrapper_status(name, namespace='default') -> Optional[AppWrapper]:
123127

124128
def _ray_cluster_status(name, namespace='default') -> Optional[RayCluster]:
125129
# FIXME should we check the appwrapper first
126-
with oc.project(namespace), oc.timeout(10*60):
127-
cluster = oc.selector(f'rayclusters/{name}').object()
128-
129-
if cluster:
130-
return _map_to_ray_cluster(cluster)
130+
cluster = None
131+
try:
132+
with oc.project(namespace), oc.timeout(10*60):
133+
cluster = oc.selector(f'rayclusters/{name}').object()
134+
135+
if cluster:
136+
return _map_to_ray_cluster(cluster)
137+
except:
138+
pass
139+
return cluster
131140

132141

133142
def _get_ray_clusters(namespace='default') -> List[RayCluster]:
@@ -161,14 +170,16 @@ def _map_to_ray_cluster(cluster) -> RayCluster:
161170
cluster_model = cluster.model
162171
return RayCluster(
163172
name=cluster.name(), status=RayClusterStatus(cluster_model.status.state.lower()),
173+
#for now we are not using autoscaling so same replicas is fine
164174
min_workers=cluster_model.spec.workerGroupSpecs[0].replicas,
165175
max_workers=cluster_model.spec.workerGroupSpecs[0].replicas,
166176
worker_mem_max=cluster_model.spec.workerGroupSpecs[
167177
0].template.spec.containers[0].resources.limits.memory,
168178
worker_mem_min=cluster_model.spec.workerGroupSpecs[
169179
0].template.spec.containers[0].resources.requests.memory,
170180
worker_cpu=cluster_model.spec.workerGroupSpecs[0].template.spec.containers[0].resources.limits.cpu,
171-
worker_gpu=0)
181+
worker_gpu=0, #hard to detect currently how many gpus, can override it with what the user asked for
182+
namespace=cluster.namespace())
172183

173184

174185
def _map_to_app_wrapper(cluster) -> AppWrapper:

src/codeflare_sdk/cluster/model.py

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class RayCluster:
3131
worker_mem_max: str
3232
worker_cpu: int
3333
worker_gpu: int
34+
namespace: str
3435

3536
@dataclass
3637
class AppWrapper:

src/codeflare_sdk/utils/pretty_print.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@ def print_app_wrappers_status(app_wrappers:List[AppWrapper]):
2222
name = app_wrapper.name
2323
status = app_wrapper.status.value
2424

25-
table = Table(box=None, title="[bold] :rocket: List of CodeFlare clusters in queue:rocket:")
25+
table = Table(box=box.ASCII_DOUBLE_HEAD, title="[bold] :rocket: List of CodeFlare clusters in queue:rocket:")
26+
table.add_row("") #empty row for spacing
2627
table.add_column("Name", style="cyan", no_wrap=True)
2728
table.add_column("Status", style="magenta")
28-
table.add_row("[bold underline]"+name,status)
29+
table.add_row(name,status)
2930
table.add_row("") #empty row for spacing
3031
console.print(Panel.fit(table))
3132

@@ -47,7 +48,7 @@ def print_clusters(clusters:List[RayCluster], verbose=True):
4748
maxcount = str(cluster.max_workers)
4849
memory = cluster.worker_mem_min+"~"+cluster.worker_mem_max
4950
cpu = str(cluster.worker_cpu)
50-
gpu = str(cluster.worker_mem_max)
51+
gpu = str(cluster.worker_gpu)
5152
#owned = bool(cluster["userOwned"])
5253
owned = True
5354

@@ -59,7 +60,8 @@ def print_clusters(clusters:List[RayCluster], verbose=True):
5960
table0.add_row("")
6061
table0.add_row("[bold underline]"+name,status)
6162
table0.add_row()
62-
table0.add_row(f"[bold]URI:[/bold] ray://{name}-head-svc:1001") #format that is used to generate the name of the service
63+
#fixme harcded to default for now
64+
table0.add_row(f"[bold]URI:[/bold] ray://{cluster.name}-head-svc.{cluster.namespace}.svc:10001") #format that is used to generate the name of the service
6365
table0.add_row()
6466
table0.add_row(f"[link={dashboard} blue underline]Dashboard:link:[/link]")
6567
table0.add_row("") #empty row for spacing

0 commit comments

Comments
 (0)