[BugFix] Fix missing min/max alpha clamps in losses (#2684)

Vincent Moens · web-flow · commit ed656a15fe9b · 2025-01-09T17:50:24.000Z
diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py
@@ -892,7 +892,7 @@ def alpha_loss(self, tensordict: TensorDictBase) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         alpha = self.log_alpha.data.exp()
         return alpha
diff --git a/torchrl/objectives/crossq.py b/torchrl/objectives/crossq.py
@@ -677,7 +677,7 @@ def alpha_loss(self, log_prob: Tensor) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         with torch.no_grad():
             alpha = self.log_alpha.exp()
diff --git a/torchrl/objectives/decision_transformer.py b/torchrl/objectives/decision_transformer.py
@@ -171,7 +171,7 @@ def _forward_value_estimator_keys(self, **kwargs):
 
     @property
     def alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         with torch.no_grad():
             alpha = self.log_alpha.exp()
diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
@@ -846,7 +846,7 @@ def _alpha_loss(self, log_prob: Tensor) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         with torch.no_grad():
             alpha = self.log_alpha.exp()
@@ -1374,7 +1374,7 @@ def _alpha_loss(self, log_prob: Tensor) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data = self.log_alpha.data.clamp(
                 self.min_log_alpha, self.max_log_alpha
             )