Skip to content

Commit f78909d

Browse files
committed
Convert tensor method and save files
Signed-off-by: Flavia Beo <[email protected]>
1 parent c48e271 commit f78909d

File tree

1 file changed

+35
-25
lines changed

1 file changed

+35
-25
lines changed

scripts/generate_layers_metrics.py

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@
2929
common_seq_lengths = [64]
3030
common_max_new_tokens = [128]
3131

32-
output_dir = os.environ.get("OUTPUT_PATH", os.path.expanduser("~/tmp/output")
33-
)
32+
output_dir = os.environ.get("OUTPUT_PATH", "/tmp/output")
3433

3534
# pass custom model path list for eg: EXPORT FMS_TESTING_COMMON_MODEL_PATHS="/tmp/models/granite-3-8b-base,/tmp/models/granite-7b-base"
3635
if isinstance(common_model_paths, str):
@@ -199,10 +198,18 @@ def post_hook_fn(module, input, output):
199198
def write_csv(l, path, metric):
200199
with open(path, 'w') as f:
201200
f.write(f'{metric}\n')
202-
for t in l:
203-
f.write(f"{t[2].item()}\n")
201+
if type(l) is list:
202+
for t in l:
203+
f.write(f"{t}\n")
204+
else:
205+
f.write(f"{l}\n")
204206
f.close()
205207

208+
def convert_tensor(output):
209+
out_unique = set(output)
210+
keys = {key: value for key, value in zip(out_unique, range(len(out_unique)))}
211+
return torch.zeros(size=(len(output), len(keys)))
212+
206213
def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
207214
torch.manual_seed(42)
208215
os.environ["COMPILATION_MODE"] = "offline_decoder"
@@ -249,43 +256,46 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens):
249256
tokenizer=tokenizer)
250257

251258
absolute_differences = []
259+
mean_diff_list = []
260+
median_diff_list = []
261+
abs_diff_list = []
252262

253263
assert len(layer_stack_cuda) == len(layer_stack_cpu)
254264

255-
for layer, cuda_out in layer_stack_cuda:
265+
for layer, cuda_output in layer_stack_cuda:
256266
for cpu_layer, cpu_output in layer_stack_cpu:
257267
if cpu_layer == layer:
258268
print("CPU Layer {} GPU Layer {}".format(cpu_layer, layer))
259269

260-
if not type(cuda_out) is tuple:
261-
tensor_cpu_cuda_out = cuda_out.to(torch.device('cpu'))
270+
if not type(cuda_output) is tuple:
271+
tensor_cuda_out = cuda_output.to(torch.device('cpu'))
262272
else:
263-
cuda_out_unique = set(cuda_out)
264-
keys = {key: value for key, value in zip(cuda_out_unique, range(len(cuda_out_unique)))}
265-
tensor_cpu_cuda_out = torch.zeros(size=(len(cuda_out), len(keys)))
266-
267-
abs_diff = torch.abs(cpu_output - tensor_cpu_cuda_out).flatten().tolist()
268-
absolute_differences.extend(abs_diff)
273+
tensor_cuda_out = convert_tensor(cuda_output)
274+
if type(cpu_output) is tuple:
275+
tensor_cpu_out = convert_tensor(cpu_output)
276+
else:
277+
tensor_cpu_out = cpu_output
269278

279+
abs_diff = torch.abs(tensor_cpu_out - tensor_cuda_out).flatten().tolist()
280+
absolute_differences.extend(abs_diff)
270281
if len(absolute_differences) == 0:
271282
abs_diff = {"mean": float('nan'), "median": float('nan'), "q1": float('nan'), "q3": float('nan')}
272283

273-
abs_diff_tensor = torch.tensor(absolute_differences)
274-
abs_diff_tensor = torch.nan_to_num(abs_diff_tensor, nan=0.0)
275-
mean_diff = torch.mean(abs_diff_tensor).item()
276-
median_diff = torch.median(abs_diff_tensor).item()
284+
abs_diff_tensor = torch.tensor(absolute_differences)
285+
abs_diff_tensor = torch.nan_to_num(abs_diff_tensor, nan=0.0)
286+
mean_diff = torch.mean(abs_diff_tensor).item()
287+
median_diff = torch.median(abs_diff_tensor).item()
288+
289+
prefix = get_default_validation_prefix(model_id, max_new_token, batch_size, 0, 'float16')
277290

278-
return abs_diff, mean_diff, median_diff
291+
write_csv(abs_diff, os.path.join(output_dir, f"{prefix}--{layer}.abs_diff.csv"), "abs_diff")
292+
write_csv(mean_diff, os.path.join(output_dir, f"{prefix}--{layer}.mean_diff.csv"), "mean_diff")
293+
write_csv(median_diff, os.path.join(output_dir, f"{prefix}--{layer}.median_diff.csv"), "median_diff")
294+
295+
print(f"Completed {model_id} layers' metrics generation")
279296

280297
for model_id, batch_size, sequence_length, max_new_token in common_shapes:
281298
print("testing ", "model_id-", model_id, ", max_new_tokens-", max_new_token, ", batch_size-",batch_size, ", seq_length-",sequence_length)
282299
abs_diff, mean_diff, median_diff = generate_layers_metrics(model_path=model_id, batch_size=batch_size, seq_length=sequence_length, max_new_tokens=max_new_token)
283300

284-
prefix = get_default_validation_prefix(model_id, max_new_token, batch_size, 0, 'float16')
285-
if os.path.exists(os.path.join(output_dir, f"{prefix}.abs_diff.csv")):
286-
print("skipping metric generation as it has already been done")
287-
exit(0)
288-
write_csv(abs_diff, os.path.join(output_dir, f"{prefix}.abs_diff.csv"), "abs_diff")
289-
write_csv(mean_diff, os.path.join(output_dir, f"{prefix}.mean_diff.csv"), "mean_diff")
290-
write_csv(median_diff, os.path.join(output_dir, f"{prefix}.median_diff.csv"), "median_diff")
291301

0 commit comments

Comments
 (0)