Skip to content

Commit 67c0b52

Browse files
authored
Rename cortex cluster configure cmd to cortex cluster scale (#2040)
1 parent 755658d commit 67c0b52

File tree

22 files changed

+319
-357
lines changed

22 files changed

+319
-357
lines changed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ Add this to your bash profile (e.g. `~/.bash_profile`, `~/.profile` or `~/.bashr
188188

189189
```bash
190190
# set the default image for APIs
191-
export CORTEX_DEV_DEFAULT_PREDICTOR_IMAGE_REGISTRY="<account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs"
191+
export CORTEX_DEV_DEFAULT_IMAGE_REGISTRY="<account_id>.dkr.ecr.<region>.amazonaws.com/cortexlabs"
192192
193193
# redirect analytics and error reporting to our dev environment
194194
export CORTEX_TELEMETRY_SENTRY_DSN="https://[email protected]/1848098"

Makefile

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,18 +84,6 @@ cluster-info:
8484
@$(MAKE) cli
8585
@eval $$(python3 ./manager/cluster_config_env.py ./dev/config/cluster.yaml) && ./bin/cortex cluster info --config=./dev/config/cluster.yaml --configure-env="$$CORTEX_CLUSTER_NAME" --yes
8686

87-
cluster-configure:
88-
@$(MAKE) images-all
89-
@$(MAKE) cli
90-
@kill $(shell pgrep -f rerun) >/dev/null 2>&1 || true
91-
@eval $$(python3 ./manager/cluster_config_env.py ./dev/config/cluster.yaml) && ./bin/cortex cluster configure ./dev/config/cluster.yaml --configure-env="$$CORTEX_CLUSTER_NAME"
92-
93-
cluster-configure-y:
94-
@$(MAKE) images-all
95-
@$(MAKE) cli
96-
@kill $(shell pgrep -f rerun) >/dev/null 2>&1 || true
97-
@eval $$(python3 ./manager/cluster_config_env.py ./dev/config/cluster.yaml) && ./bin/cortex cluster configure ./dev/config/cluster.yaml --configure-env="$$CORTEX_CLUSTER_NAME" --yes
98-
9987
# stop the in-cluster operator
10088
operator-stop:
10189
@$(MAKE) kubectl

cli/cmd/cluster.go

Lines changed: 118 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,17 @@ import (
5151
)
5252

5353
var (
54-
_flagClusterUpEnv string
55-
_flagClusterInfoEnv string
56-
_flagClusterConfigureEnv string
57-
_flagClusterConfig string
58-
_flagClusterName string
59-
_flagClusterRegion string
60-
_flagClusterInfoDebug bool
61-
_flagClusterDisallowPrompt bool
62-
_flagClusterDownKeepVolumes bool
54+
_flagClusterUpEnv string
55+
_flagClusterInfoEnv string
56+
_flagClusterScaleNodeGroup string
57+
_flagClusterScaleMinInstances int64
58+
_flagClusterScaleMaxInstances int64
59+
_flagClusterConfig string
60+
_flagClusterName string
61+
_flagClusterRegion string
62+
_flagClusterInfoDebug bool
63+
_flagClusterDisallowPrompt bool
64+
_flagClusterDownKeepVolumes bool
6365
)
6466

6567
var _eksctlPrefixRegex = regexp.MustCompile(`^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} \[.+] {2}`)
@@ -79,10 +81,12 @@ func clusterInit() {
7981
_clusterInfoCmd.Flags().BoolVarP(&_flagClusterDisallowPrompt, "yes", "y", false, "skip prompts")
8082
_clusterCmd.AddCommand(_clusterInfoCmd)
8183

82-
_clusterConfigureCmd.Flags().SortFlags = false
83-
_clusterConfigureCmd.Flags().StringVarP(&_flagClusterConfigureEnv, "configure-env", "e", "", "name of environment to configure")
84-
_clusterConfigureCmd.Flags().BoolVarP(&_flagClusterDisallowPrompt, "yes", "y", false, "skip prompts")
85-
_clusterCmd.AddCommand(_clusterConfigureCmd)
84+
_clusterScaleCmd.Flags().SortFlags = false
85+
addClusterNameFlag(_clusterScaleCmd)
86+
addClusterRegionFlag(_clusterScaleCmd)
87+
addClusterScaleFlags(_clusterScaleCmd)
88+
_clusterScaleCmd.Flags().BoolVarP(&_flagClusterDisallowPrompt, "yes", "y", false, "skip prompts")
89+
_clusterCmd.AddCommand(_clusterScaleCmd)
8690

8791
_clusterDownCmd.Flags().SortFlags = false
8892
addClusterConfigFlag(_clusterDownCmd)
@@ -112,6 +116,13 @@ func addClusterRegionFlag(cmd *cobra.Command) {
112116
cmd.Flags().StringVarP(&_flagClusterRegion, "region", "r", "", "aws region of the cluster")
113117
}
114118

119+
func addClusterScaleFlags(cmd *cobra.Command) {
120+
cmd.Flags().StringVar(&_flagClusterScaleNodeGroup, "node-group", "", "name of the node group to scale")
121+
cmd.MarkFlagRequired("node-group")
122+
cmd.Flags().Int64Var(&_flagClusterScaleMinInstances, "min-instances", 0, "minimum number of instances")
123+
cmd.Flags().Int64Var(&_flagClusterScaleMaxInstances, "max-instances", 0, "maximum number of instances")
124+
}
125+
115126
var _clusterCmd = &cobra.Command{
116127
Use: "cluster",
117128
Short: "manage cortex clusters (contains subcommands)",
@@ -193,7 +204,7 @@ var _clusterUpCmd = &cobra.Command{
193204
exit.Error(err)
194205
}
195206

196-
out, exitCode, err := runManagerWithClusterConfig("/root/install.sh", clusterConfig, awsClient, nil, nil)
207+
out, exitCode, err := runManagerWithClusterConfig("/root/install.sh", clusterConfig, awsClient, nil, nil, nil)
197208
if err != nil {
198209
exit.Error(err)
199210
}
@@ -288,20 +299,29 @@ var _clusterUpCmd = &cobra.Command{
288299
},
289300
}
290301

291-
var _clusterConfigureCmd = &cobra.Command{
292-
Use: "configure [CLUSTER_CONFIG_FILE]",
293-
Short: "update a cluster's configuration",
294-
Args: cobra.ExactArgs(1),
302+
var _clusterScaleCmd = &cobra.Command{
303+
Use: "scale [flags]",
304+
Short: "update the min/max instances for a nodegroup",
305+
Args: cobra.NoArgs,
295306
Run: func(cmd *cobra.Command, args []string) {
296307
telemetry.Event("cli.cluster.configure")
297308

298-
clusterConfigFile := args[0]
309+
var scaleMinIntances, scaleMaxInstances *int64
310+
if wasFlagProvided(cmd, "min-instances") {
311+
scaleMinIntances = pointer.Int64(_flagClusterScaleMinInstances)
312+
}
313+
if wasFlagProvided(cmd, "max-instances") {
314+
scaleMaxInstances = pointer.Int64(_flagClusterScaleMaxInstances)
315+
}
316+
if scaleMinIntances == nil && scaleMaxInstances == nil {
317+
exit.Error(ErrorSpecifyAtLeastOneFlag("--min-instances", "--max-instances"))
318+
}
299319

300320
if _, err := docker.GetDockerClient(); err != nil {
301321
exit.Error(err)
302322
}
303323

304-
accessConfig, err := getNewClusterAccessConfig(clusterConfigFile)
324+
accessConfig, err := getClusterAccessConfigWithCache()
305325
if err != nil {
306326
exit.Error(err)
307327
}
@@ -321,34 +341,25 @@ var _clusterConfigureCmd = &cobra.Command{
321341
exit.Error(err)
322342
}
323343

324-
cachedClusterConfig := refreshCachedClusterConfig(*awsClient, accessConfig)
325-
326-
clusterConfig, err := getConfigureClusterConfig(cachedClusterConfig, clusterConfigFile, _flagClusterDisallowPrompt)
344+
clusterConfig := refreshCachedClusterConfig(*awsClient, accessConfig)
345+
clusterConfig, err = updateNodeGroupScale(clusterConfig, _flagClusterScaleNodeGroup, scaleMinIntances, scaleMaxInstances, _flagClusterDisallowPrompt)
327346
if err != nil {
328347
exit.Error(err)
329348
}
330349

331-
out, exitCode, err := runManagerWithClusterConfig("/root/install.sh --update", clusterConfig, awsClient, nil, nil)
350+
out, exitCode, err := runManagerWithClusterConfig("/root/install.sh --update", &clusterConfig, awsClient, nil, nil, []string{
351+
"CORTEX_SCALING_NODEGROUP=" + _flagClusterScaleNodeGroup,
352+
"CORTEX_SCALING_MIN_INSTANCES=" + s.Int64(_flagClusterScaleMinInstances),
353+
"CORTEX_SCALING_MAX_INSTANCES=" + s.Int64(_flagClusterScaleMaxInstances),
354+
})
332355
if err != nil {
333356
exit.Error(err)
334357
}
335358
if exitCode == nil || *exitCode != 0 {
336359
helpStr := "\ndebugging tips (may or may not apply to this error):"
337360
helpStr += fmt.Sprintf("\n* if your cluster was unable to provision instances, additional error information may be found in the activity history of your cluster's autoscaling groups (select each autoscaling group and click the \"Activity\" or \"Activity History\" tab): https://console.aws.amazon.com/ec2/autoscaling/home?region=%s#AutoScalingGroups:", clusterConfig.Region)
338361
fmt.Println(helpStr)
339-
exit.Error(ErrorClusterConfigure(out + helpStr))
340-
}
341-
342-
if _flagClusterConfigureEnv != "" {
343-
loadBalancer, err := getAWSOperatorLoadBalancer(clusterConfig.ClusterName, awsClient)
344-
if err != nil {
345-
exit.Error(errors.Append(err, fmt.Sprintf("\n\nyou can attempt to resolve this issue and configure your cli environment by running `cortex cluster info --configure-env %s`", _flagClusterConfigureEnv)))
346-
}
347-
operatorEndpoint := "https://" + *loadBalancer.DNSName
348-
err = updateAWSCLIEnv(_flagClusterConfigureEnv, operatorEndpoint, _flagClusterDisallowPrompt)
349-
if err != nil {
350-
exit.Error(errors.Append(err, fmt.Sprintf("\n\nyou can attempt to resolve this issue and configure your cli environment by running `cortex cluster info --configure-env %s`", _flagClusterConfigureEnv)))
351-
}
362+
exit.Error(ErrorClusterScale(out + helpStr))
352363
}
353364
},
354365
}
@@ -659,7 +670,7 @@ func cmdInfo(awsClient *aws.Client, accessConfig *clusterconfig.AccessConfig, di
659670

660671
clusterConfig := refreshCachedClusterConfig(*awsClient, accessConfig)
661672

662-
out, exitCode, err := runManagerWithClusterConfig("/root/info.sh", &clusterConfig, awsClient, nil, nil)
673+
out, exitCode, err := runManagerWithClusterConfig("/root/info.sh", &clusterConfig, awsClient, nil, nil, nil)
663674
if err != nil {
664675
exit.Error(err)
665676
}
@@ -970,6 +981,75 @@ func refreshCachedClusterConfig(awsClient aws.Client, accessConfig *clusterconfi
970981
return *refreshedClusterConfig
971982
}
972983

984+
func updateNodeGroupScale(clusterConfig clusterconfig.Config, targetNg string, desiredMinReplicas, desiredMaxReplicas *int64, disallowPrompt bool) (clusterconfig.Config, error) {
985+
clusterName := clusterConfig.ClusterName
986+
region := clusterConfig.Region
987+
988+
ngFound := false
989+
availableNodeGroups := []string{}
990+
for idx, ng := range clusterConfig.NodeGroups {
991+
if ng == nil {
992+
continue
993+
}
994+
availableNodeGroups = append(availableNodeGroups, ng.Name)
995+
if ng.Name == targetNg {
996+
var minReplicas, maxReplicas int64
997+
if desiredMinReplicas == nil {
998+
minReplicas = ng.MinInstances
999+
} else {
1000+
minReplicas = *desiredMinReplicas
1001+
}
1002+
if desiredMaxReplicas == nil {
1003+
maxReplicas = ng.MaxInstances
1004+
} else {
1005+
maxReplicas = *desiredMaxReplicas
1006+
}
1007+
1008+
if minReplicas < 0 {
1009+
return clusterconfig.Config{}, ErrorMinInstancesLowerThan(0)
1010+
}
1011+
if maxReplicas < 0 {
1012+
return clusterconfig.Config{}, ErrorMaxInstancesLowerThan(0)
1013+
}
1014+
if minReplicas > maxReplicas {
1015+
return clusterconfig.Config{}, ErrorMinInstancesGreaterThanMaxInstances(minReplicas, maxReplicas)
1016+
}
1017+
1018+
if ng.MinInstances == minReplicas && ng.MaxInstances == maxReplicas {
1019+
fmt.Printf("the %s nodegroup in the %s cluster in %s already has min instances set to %d and max instances set to %d\n", ng.Name, clusterName, region, minReplicas, maxReplicas)
1020+
exit.Ok()
1021+
}
1022+
1023+
if !disallowPrompt {
1024+
promptMessage := ""
1025+
if ng.MinInstances != minReplicas && ng.MaxInstances != maxReplicas {
1026+
promptMessage = fmt.Sprintf("your nodegroup named %s in your %s cluster in %s will update its %s from %d to %d and update its %s from %d to %d", ng.Name, clusterName, region, clusterconfig.MinInstancesKey, ng.MinInstances, minReplicas, clusterconfig.MaxInstancesKey, ng.MaxInstances, maxReplicas)
1027+
}
1028+
if ng.MinInstances == minReplicas && ng.MaxInstances != maxReplicas {
1029+
promptMessage = fmt.Sprintf("your nodegroup named %s in your %s cluster in %s will update its %s from %d to %d", ng.Name, clusterName, region, clusterconfig.MaxInstancesKey, ng.MaxInstances, maxReplicas)
1030+
}
1031+
if ng.MinInstances != minReplicas && ng.MaxInstances == maxReplicas {
1032+
promptMessage = fmt.Sprintf("your nodegroup named %s in your %s cluster in %s will update its %s from %d to %d", ng.Name, clusterName, region, clusterconfig.MinInstancesKey, ng.MinInstances, minReplicas)
1033+
}
1034+
if !prompt.YesOrNo(promptMessage, "", "") {
1035+
exit.Ok()
1036+
}
1037+
}
1038+
1039+
clusterConfig.NodeGroups[idx].MinInstances = minReplicas
1040+
clusterConfig.NodeGroups[idx].MaxInstances = maxReplicas
1041+
ngFound = true
1042+
break
1043+
}
1044+
}
1045+
1046+
if !ngFound {
1047+
return clusterconfig.Config{}, ErrorNodeGroupNotFound(targetNg, clusterName, region, availableNodeGroups)
1048+
}
1049+
1050+
return clusterConfig, nil
1051+
}
1052+
9731053
func createS3BucketIfNotFound(awsClient *aws.Client, bucket string, tags map[string]string) error {
9741054
bucketFound, err := awsClient.DoesBucketExist(bucket)
9751055
if err != nil {

0 commit comments

Comments
 (0)