3
3
4
4
import itertools
5
5
import torch
6
+ import torch .nn as nn
6
7
7
8
from fms .utils import tokenizers
8
9
from fms .models import get_model
24
25
"SHARE_GPT_DATASET_PATH" , os .path .expanduser ("~/share_gpt.json" )
25
26
)
26
27
27
- common_model_paths = "ibm-granite/granite-3.2-8b-instruct"
28
- common_batch_sizes = [1 ]
29
- common_seq_lengths = [64 ]
30
- common_max_new_tokens = [128 ]
28
+ common_model_paths = common_model_paths = os .environ .get (
29
+ "MODEL_PATHS" ,
30
+ ["ibm-granite/granite-3.2-8b-instruct" ],
31
+ )
32
+ common_batch_sizes = os .environ .get ("BATCH_SIZES" , [1 , 2 , 4 , 8 ])
33
+ common_seq_lengths = os .environ .get ("SEQ_LENGTHS" , [64 , 2048 ])
34
+ common_max_new_tokens = os .environ .get ("MAX_NEW_TOKENS" , [128 ])
31
35
32
36
output_dir = os .environ .get ("OUTPUT_PATH" , "/tmp/output" )
33
37
@@ -196,17 +200,20 @@ def post_hook_fn(module, input, output):
196
200
return layer_stack
197
201
198
202
def write_csv (l , path , metric ):
203
+ print ("saving file" )
199
204
with open (path , 'w' ) as f :
200
205
f .write (f'{ metric } \n ' )
201
- if type (l ) is list :
206
+ if not type (l ) is float :
207
+ print ("saving list" )
202
208
for t in l :
203
- f .write (f"{ t } \n " )
209
+ f .write (f"{ t } \n " )
204
210
else :
205
- f .write (f"{ l } \n " )
211
+ print ("saving float" )
212
+ f .write (f"{ l } \n " )
206
213
f .close ()
207
214
208
215
def convert_tensor (output ):
209
- out_unique = set (output )
216
+ out_unique = set (list ( itertools . chain . from_iterable ( output )) )
210
217
keys = {key : value for key , value in zip (out_unique , range (len (out_unique )))}
211
218
return torch .zeros (size = (len (output ), len (keys )))
212
219
@@ -256,9 +263,6 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
256
263
tokenizer = tokenizer )
257
264
258
265
absolute_differences = []
259
- mean_diff_list = []
260
- median_diff_list = []
261
- abs_diff_list = []
262
266
263
267
assert len (layer_stack_cuda ) == len (layer_stack_cpu )
264
268
@@ -275,27 +279,26 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
275
279
tensor_cpu_out = convert_tensor (cpu_output )
276
280
else :
277
281
tensor_cpu_out = cpu_output
278
-
282
+ print ( "tensor converted... get torch abs diff" )
279
283
abs_diff = torch .abs (tensor_cpu_out - tensor_cuda_out ).flatten ().tolist ()
280
- absolute_differences .extend (abs_diff )
281
- if len (absolute_differences ) == 0 :
282
- abs_diff = {"mean" : float ('nan' ), "median" : float ('nan' ), "q1" : float ('nan' ), "q3" : float ('nan' )}
283
-
284
- abs_diff_tensor = torch .tensor (absolute_differences )
285
- abs_diff_tensor = torch .nan_to_num (abs_diff_tensor , nan = 0.0 )
286
- mean_diff = torch .mean (abs_diff_tensor ).item ()
287
- median_diff = torch .median (abs_diff_tensor ).item ()
284
+ cos = nn .CosineSimilarity ()
285
+ cos_sim = cos (tensor_cpu_out - tensor_cuda_out )
286
+
287
+ print ("abs_diff and cos_sim calculated" )
288
+ absolute_differences .append (abs_diff )
289
+ print ("abs_diff list extended" )
288
290
289
291
prefix = get_default_validation_prefix (model_id , max_new_token , batch_size , 0 , 'float16' )
292
+ layer_name = str (layer ).replace ('[' ,'' ).replace (']' , '' )
290
293
291
- write_csv ( abs_diff , os . path . join ( output_dir , f" { prefix } -- { layer } .abs_diff.csv" ), "abs_diff " )
292
- write_csv (mean_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .mean_diff .csv" ), "mean_diff " )
293
- write_csv (median_diff , os .path .join (output_dir , f"{ prefix } --{ layer } .median_diff .csv" ), "median_diff " )
294
+ print ( "saving files " )
295
+ write_csv (abs_diff , os .path .join (output_dir , f"{ prefix } --{ layer_name } .abs_diff .csv" ), "abs_diff " )
296
+ write_csv (cos_sim , os .path .join (output_dir , f"{ prefix } --{ layer_name } .cos_sim .csv" ), "cos_sim " )
294
297
295
298
print (f"Completed { model_id } layers' metrics generation" )
296
299
297
300
for model_id , batch_size , sequence_length , max_new_token in common_shapes :
298
301
print ("testing " , "model_id-" , model_id , ", max_new_tokens-" , max_new_token , ", batch_size-" ,batch_size , ", seq_length-" ,sequence_length )
299
- abs_diff , mean_diff , median_diff = generate_layers_metrics (model_path = model_id , batch_size = batch_size , seq_length = sequence_length , max_new_tokens = max_new_token )
302
+ generate_layers_metrics (model_path = model_id , batch_size = batch_size , seq_length = sequence_length , max_new_tokens = max_new_token )
300
303
301
304
0 commit comments