-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
51 lines (45 loc) · 2.43 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from collections import deque
from typing import Union
import numpy as np
import torch
from torch import nn
from model import ActorCritic
def saveToFile(policy: ActorCritic, entropy_losses, pg_losses, value_losses, clip_fractions, approx_kl_divs, loss, num_timesteps, clip_range_loss, clip_range_vf, n_updates, game) -> None:
with open(F"log_dir_{game}/output_log_{game}.txt", "a") as file:
file.write(F"============= TIMESTEP N. {num_timesteps} =============\n")
file.write(F"Mean episode reward (rollout): {compute_mean([ep_info['r'] for ep_info in policy.ep_info_buffer])}\n")
file.write(F"Mean episode lenght (rollout): {compute_mean([ep_info['l'] for ep_info in policy.ep_info_buffer])}\n")
file.write("============================================\n")
file.write(f"Entropy loss: {np.mean(entropy_losses)}\n")
file.write(f"Policy loss: {np.mean(pg_losses)}\n")
file.write(f"Value loss: {np.mean(value_losses)}\n")
file.write(f"Clip fraction: {np.mean(clip_fractions)}\n")
file.write(f"Approximate KL Divergence: {np.mean(approx_kl_divs)}\n")
file.write(f"Loss: {loss}\n")
if hasattr(policy, "log_std"):
file.write(f"Std: {torch.exp(policy.log_std).mean().item()}\n")
file.write(f"Clip range: {clip_range_loss}\n")
if clip_range_vf is not None:
file.write(f"Clip range vf: {clip_range_vf}\n")
file.write(F"Model updates: {n_updates}\n")
file.write("======================================================\n")
# Compute mean. If array is empty, return NaN, otherwise return mean
def compute_mean(arr: Union[np.ndarray, list, deque]) -> float:
return np.nan if len(arr) == 0 else float(np.mean(arr))
# Orthogonal initialization for weights and 0 for biases
def init_weights(module: nn.Module, gain: float = 1) -> None:
if isinstance(module, (nn.Linear, nn.Conv2d)):
nn.init.orthogonal_(module.weight, gain=gain)
if module.bias is not None:
module.bias.data.fill_(0.0)
def retrieveOptimizer(model_path: str) -> str:
# Split the string by underscores
parts = model_path.split('_')
# Modify the second part and remove the unnecessary part
algorithm_info = parts[1].split('-')[0] + "-OPTIMIZER"
# Update the parts list
parts[1] = algorithm_info
# Remove the unnecessary part
parts.pop(3)
# Join the parts back into a new string
return '_'.join(parts) + ".pt"