35
35
NUM_USERS_TO_CREATE = 5
36
36
NUM_USERS_TO_TEST = 3
37
37
38
+ MAX_QUARANTINED_STACKS = 5
39
+
40
+ AD_STACK_PREFIX = 'integ-tests-MultiUserInfraStack'
41
+
42
+ DO_NOT_DELETE_TAG_KEY = 'DO-NOT-DELETE'
43
+
38
44
39
45
def get_infra_stack_outputs (stack_name ):
40
46
cfn = boto3 .client ("cloudformation" )
@@ -117,7 +123,7 @@ def add_tag_to_stack(stack_name, key, value):
117
123
stack = cfn .Stack (stack_name )
118
124
add_tag = True
119
125
for tag in stack .tags :
120
- if tag .get ("Key" ) == "DO-NOT-DELETE" :
126
+ if tag .get ("Key" ) == DO_NOT_DELETE_TAG_KEY :
121
127
add_tag = False
122
128
break
123
129
if add_tag :
@@ -189,7 +195,7 @@ def _get_stack_parameters(directory_type, vpc_stack, keypair):
189
195
190
196
def _create_directory_stack (cfn_stacks_factory , request , directory_type , region , vpc_stack : CfnVpcStack ):
191
197
directory_stack_name = generate_stack_name (
192
- f"integ-tests-MultiUserInfraStack { directory_type } " , request .config .getoption ("stackname_suffix" )
198
+ f"{ AD_STACK_PREFIX } { directory_type } " , request .config .getoption ("stackname_suffix" )
193
199
)
194
200
195
201
if directory_type not in ("MicrosoftAD" , "SimpleAD" ):
@@ -203,7 +209,7 @@ def _create_directory_stack(cfn_stacks_factory, request, directory_type, region,
203
209
stack_parameters = _get_stack_parameters (directory_type , vpc_stack , request .config .getoption ("key_name" ))
204
210
tags = [{"Key" : "parallelcluster:integ-tests-ad-stack" , "Value" : directory_type }]
205
211
if request .config .getoption ("retain_ad_stack" ):
206
- tags .append ({"Key" : "DO-NOT-DELETE" , "Value" : "Retained for integration testing" })
212
+ tags .append ({"Key" : DO_NOT_DELETE_TAG_KEY , "Value" : "Retained for integration testing" })
207
213
208
214
directory_stack = CfnStack (
209
215
name = directory_stack_name ,
@@ -213,11 +219,30 @@ def _create_directory_stack(cfn_stacks_factory, request, directory_type, region,
213
219
capabilities = ["CAPABILITY_IAM" , "CAPABILITY_NAMED_IAM" , "CAPABILITY_AUTO_EXPAND" ],
214
220
tags = tags ,
215
221
)
216
- cfn_stacks_factory .create_stack (directory_stack )
222
+ try :
223
+ cfn_stacks_factory .create_stack (directory_stack , stack_is_under_test = True )
224
+ except BaseException as e :
225
+ logging .error ("Failed to create stack %s" , directory_stack_name )
226
+ # We want to retain the stack in case of failure in order to debug it.
227
+ # We retain a limited number of stack to contain the costs.
228
+ n_retained_ad_stacks = get_retained_ad_stacks_count ()
229
+ if n_retained_ad_stacks < MAX_QUARANTINED_STACKS :
230
+ logging .warn ("Retaining failed stack %s to debug failure" , directory_stack_name )
231
+ add_tag_to_stack (directory_stack .name , DO_NOT_DELETE_TAG_KEY , "Retained to debug failure" )
232
+ else :
233
+ logging .warn ("Cannot retain failed stack %s for debugging because there are already %d retained (max: %d)" ,
234
+ directory_stack_name , n_retained_ad_stacks , MAX_QUARANTINED_STACKS )
235
+ raise e
217
236
logging .info ("Creation of stack %s complete" , directory_stack_name )
218
237
219
238
return directory_stack
220
239
240
+ def get_retained_ad_stacks_count ():
241
+ cfn = boto3 .client ("cloudformation" )
242
+ failed_stacks = cfn .list_stacks (StackStatusFilter = ['CREATE_FAILED' ])["StackSummaries" ]
243
+ failed_ad_stacks = [stack for stack in failed_stacks if AD_STACK_PREFIX in stack .get ('StackName' )]
244
+ return len ([stack for stack in failed_ad_stacks if stack .get ("Tags" ) and
245
+ any (tag .get ("Key" ) == DO_NOT_DELETE_TAG_KEY for tag in stack .get ("Tags" ))])
221
246
222
247
@retry (wait_fixed = seconds (20 ), stop_max_delay = seconds (700 ))
223
248
def _check_ssm_success (ssm_client , command_id , instance_id ):
@@ -243,7 +268,7 @@ def _directory_factory(
243
268
directory_stack_name = created_directory_stacks .get (region , {}).get ("directory" )
244
269
logging .info ("Using directory stack named %s created by another test" , directory_stack_name )
245
270
else :
246
- stack_prefix = f"integ-tests-MultiUserInfraStack { directory_type } "
271
+ stack_prefix = f"{ AD_STACK_PREFIX } { directory_type } "
247
272
directory_stack_name = find_stack_by_tag ("parallelcluster:integ-tests-ad-stack" , region , stack_prefix )
248
273
249
274
if not directory_stack_name :
@@ -257,7 +282,7 @@ def _directory_factory(
257
282
directory_stack_name = directory_stack .name
258
283
created_directory_stacks [region ]["directory" ] = directory_stack_name
259
284
if request .config .getoption ("retain_ad_stack" ):
260
- add_tag_to_stack (vpc_stack .name , "DO-NOT-DELETE" , "Retained for integration testing" )
285
+ add_tag_to_stack (vpc_stack .name , DO_NOT_DELETE_TAG_KEY , "Retained for integration testing" )
261
286
return directory_stack_name
262
287
263
288
yield _directory_factory
0 commit comments