Skip to content

Commit 688bb4f

Browse files
committed
update ray scripts and name schemes
1 parent 0205e69 commit 688bb4f

File tree

2 files changed

+46
-32
lines changed

2 files changed

+46
-32
lines changed

src/ibm_ray_config/modules/gen2/ray/vpc.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ class RayVPCConfig(ConfigBuilder):
162162
def __init__(self, base_config: Dict[str, Any]) -> None:
163163
super().__init__(base_config)
164164
self.region = self.get_region()
165-
self.default_vpc_name_scheme = f'vpc-in-{self.region}-{str(uuid4())[:5]}'
165+
self.default_vpc_name_scheme = f'vpc-{uuid4().hex[:5]}'
166166

167167
if base_config.get('available_node_types'):
168168
for available_node_type in self.base_config['available_node_types']:
@@ -189,7 +189,7 @@ def update_config(self, vpc_obj, zone_obj, subnet_id):
189189
node_config)
190190
else:
191191
self.base_config['available_node_types'] = {
192-
'ray_head_default': {'node_config': node_config}}
192+
self.DEFAULT_NODE_TYPE: {'node_config': node_config}}
193193

194194

195195
@update_decorator
@@ -243,11 +243,11 @@ def _create_vpc(self, ibm_vpc_client, resource_group, auto=False):
243243
def _create():
244244
return ibm_vpc_client.create_vpc(address_prefix_management='auto', classic_access=False,
245245
name=vpc_name, resource_group=resource_group).get_result()
246-
default_vpc_prefix = self.default_vpc_name_scheme.split('-in',1)[0]
246+
default_vpc_prefix = self.default_vpc_name_scheme.rsplit('-',1)[0]
247247
if auto:
248248
vpc_prefix = default_vpc_prefix
249249
else:
250-
print(f"VPC name is {self.default_vpc_name_scheme}")
250+
print(f"VPC name is: '{self.default_vpc_name_scheme}'")
251251
vpc_prefix = free_dialog(msg= f"Pick a custom name to replace: '{default_vpc_prefix}'(or Enter for default)",
252252
default=default_vpc_prefix,
253253
validate=validate_name)['answer']
@@ -410,6 +410,7 @@ def list_vpcs():
410410
# Create a new VPC
411411
if not vpc_name:
412412
resource_group_id = self._select_resource_group()
413+
print(color_msg(f"Using resource group id: {resource_group_id}",color=Color.LIGHTGREEN))
413414
resource_group = {'id': resource_group_id}
414415

415416
vpc_obj = self._create_vpc(ibm_vpc_client, resource_group)
@@ -504,6 +505,7 @@ def verify(self, base_config):
504505
@update_decorator
505506
def create_default(self):
506507
resource_group_id = self._select_resource_group(auto=True)
508+
print(color_msg(f"Using resource group id: {resource_group_id}",color=Color.LIGHTGREEN))
507509
resource_group = {'id': resource_group_id}
508510

509511
vpc_objects = self.ibm_vpc_client.list_vpcs().get_result()['vpcs']
@@ -513,7 +515,7 @@ def create_default(self):
513515
# TODO: validate existing
514516
print(f"\n\n\033[92mUsing existing VPC with default name {vpc_obj['name']} \033[0m")
515517
else:
516-
vpc_name = f"{self.default_vpc_name_scheme}-{str(uuid4())[:5]}"
518+
vpc_name = f"{self.default_vpc_name_scheme}"
517519
vpc_obj = self._create_vpc(self.ibm_vpc_client,
518520
resource_group, vpc_name, auto=True)
519521
if not vpc_obj:

src/ibm_ray_config/modules/utils.py

+39-27
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import ibm_cloud_sdk_core
1616

1717

18-
CACHE = {}
18+
CACHE = {} # used to store data that isn't stored in the cluster's config file, e.g. vpc_name.
1919
ARG_STATUS = Enum('STATUS', 'VALID INVALID MISSING') # variable possible status.
2020

2121
class MSG_STATUS(Enum):
@@ -167,7 +167,7 @@ def validate_exists(answers, current):
167167

168168

169169
def get_region_by_endpoint(endpoint):
170-
return re.search('//(.+?).iaas.cloud.ibm.com', endpoint).group(1)
170+
return re.search('//(.+?).iaas.cloud.ibm.com/v1', endpoint).group(1)
171171

172172

173173
def find_default(template_dict, objects, name=None, id=None, substring=False):
@@ -306,14 +306,11 @@ def _prompt_user(path, default_config_file, verify_func, request, default_msg):
306306
else:
307307
path = free_dialog(request)['answer']
308308

309-
if not verify_config:
310-
input_path = _prompt_user(input_path, '', _is_valid_input_file,
311-
"Provide a path to your existing config file, or leave blank to configure from template",
312-
'Using default input file\n')
313309
output_path = _prompt_user(output_path, os.getcwd(), _is_valid_output_dir,
314310
"Provide a custom path for your config file, or leave blank for default output location",
315-
'Using default output path\n')
316-
return input_path, output_path
311+
f"Using default output path: '{os.getcwd()}'\n")
312+
# currently not supporting input file from user.
313+
return '', output_path
317314

318315
def verify_iam_api_key(answers, apikey, iam_endpoint=None):
319316
"""Terminates the config tool if no IAM_API_KEY matching the provided value exists"""
@@ -383,21 +380,23 @@ def dump_cluster_folder(config, output_folder):
383380
# create a output_folder and scripts_folder if doesn't exist
384381
if not os.path.isdir(output_folder):
385382
os.mkdir(output_folder)
386-
cluster_folder = os.path.join(output_folder, f"{config['cluster_name']}")
387-
scripts_folder = os.path.join(cluster_folder, f"scripts")
388-
os.makedirs(cluster_folder, exist_ok=True) # directory already exists
389-
os.makedirs(scripts_folder, exist_ok=True) # directory already exists
383+
cluster_folder_name = f"{config['cluster_name']}-at-"\
384+
f"{CACHE['vpc_name']}-in-{config['provider']['region']}"
385+
cluster_folder_path = os.path.join(output_folder, cluster_folder_name)
386+
scripts_folder_path = os.path.join(cluster_folder_path, 'scripts')
387+
os.makedirs(cluster_folder_path, exist_ok=True)
388+
os.makedirs(scripts_folder_path, exist_ok=True)
390389

391390
cluster_file_name = "config.yaml"
392-
cluster_file_path = os.path.join(cluster_folder, cluster_file_name)
391+
cluster_file_path = os.path.join(cluster_folder_path, cluster_file_name)
393392

394393
# get source path of ssh keys and extract their name
395394
original_private_key_path = os.path.expanduser(config['auth']['ssh_private_key'])
396395
original_public_key_path = original_private_key_path+'.pub'
397396
private_key_name = original_private_key_path.rsplit('/',1)[-1]
398397

399398
# update ssh key path to output folder
400-
new_private_key_path = os.path.join(cluster_folder, private_key_name)
399+
new_private_key_path = os.path.join(cluster_folder_path, private_key_name)
401400
new_public_key_path = new_private_key_path+'.pub'
402401
config['auth']['ssh_private_key'] = Path(new_private_key_path).name
403402

@@ -410,33 +409,46 @@ def dump_cluster_folder(config, output_folder):
410409
copy_or_move_file(original_private_key_path, new_private_key_path)
411410
copy_or_move_file(original_public_key_path, new_public_key_path)
412411

413-
write_script('create.sh',
414-
scripts_folder,
415-
[f"ray up -y {cluster_file_name}"])
412+
write_script('up.sh',
413+
scripts_folder_path,
414+
[f"ray up -y {cluster_file_name} $@"])
416415

417416
write_script('connect.sh',
418-
scripts_folder,
417+
scripts_folder_path,
419418
[f"ray dashboard --port 8265 --remote-port 8265 {cluster_file_name}"])
420419

421-
# kill tunnel created by ray dashboard by killing the PIDs involved
420+
write_script('tunnel.sh',
421+
scripts_folder_path,
422+
[f"ssh -i {private_key_name} -f -N -L $1:localhost:$1 root@$(ray get-head-ip {cluster_file_name})"])
423+
424+
# kill tunnel created by ray dashboard by killing all the PIDs involved with the port
422425
write_script('disconnect.sh',
423-
scripts_folder,
426+
scripts_folder_path,
424427
["lsof -i:8265 | awk 'NR>1 {print $2}' | sort -u | xargs kill"],
425428
run_from_cluster_dir = False)
426429

427-
write_script('terminate.sh',
428-
scripts_folder,
429-
[f"ray down -y {cluster_file_name}"])
430+
write_script('down.sh',
431+
scripts_folder_path,
432+
[f"ray down -y {cluster_file_name} $@"])
433+
434+
write_script('down-vpc.sh',
435+
scripts_folder_path,
436+
[f"ibm-ray-config -c {cluster_file_name}"])
430437

431438
write_script('stop.sh',
432-
scripts_folder,
433-
[f"ray stop -y {cluster_file_name}"])
439+
scripts_folder_path,
440+
[f"ray stop -y {cluster_file_name} $@"])
434441

435442
write_script('ray.sh',
436-
scripts_folder,
443+
scripts_folder_path,
437444
[f"ray $@"])
438445

439-
return cluster_folder
446+
write_script('submit.sh',
447+
scripts_folder_path,
448+
["pythonexec=$(realpath $1)\n",
449+
"ray submit "+cluster_file_name+" $pythonexec ${@:2}"])
450+
451+
return cluster_folder_path
440452

441453

442454
class Color(Enum):

0 commit comments

Comments
 (0)