[Performance] Hold a single copy of low/high in bounded specs

vmoens · vmoens · commit 0bd30ebd9aaa · 2025-05-30T10:49:32.000+01:00
ghstack-source-id: d6d64e3 Pull-Request-resolved: #2977
diff --git a/test/test_specs.py b/test/test_specs.py
@@ -3999,6 +3999,55 @@ def test_device_ordinal():
     assert spec.device == torch.device("cuda:0")
 
 
+class TestBatchSizeBox:
+    def test_batch_size_box_same(self):
+        spec = Bounded(shape=(10, 2), low=-1, high=1, device=torch.device("cpu"))
+        spec.shape = (10, 2)
+        assert spec.shape == (10, 2)
+        assert spec.space.low.shape == (10, 2)
+        assert spec.space.high.shape == (10, 2)
+        assert spec.space._low.shape == (10, 2)
+        assert spec.space._high.shape == (10, 2)
+        c_spec = Composite(b=spec, shape=(10,))
+        assert spec.shape == (10, 2)
+        assert spec.space.low.shape == (10, 2)
+        assert spec.space.high.shape == (10, 2)
+        assert spec.space._low.shape == (2,)
+        assert spec.space._high.shape == (2,)
+        c_spec = Composite(b=spec, shape=(10, 2))
+        assert spec.shape == (10, 2)
+        assert spec.space.low.shape == (10, 2)
+        assert spec.space.high.shape == (10, 2)
+        assert spec.space._low.shape == ()
+        assert spec.space._high.shape == ()
+        c_spec = Composite(b=spec, shape=())
+        assert spec.shape == (10, 2)
+        assert spec.space.low.shape == (10, 2)
+        assert spec.space.high.shape == (10, 2)
+        assert spec.space._low.shape == (10, 2)
+        assert spec.space._high.shape == (10, 2)
+
+    def test_batch_size_box_diff(self):
+        spec = Bounded(
+            shape=(10, 2),
+            low=-torch.arange(20).view(10, 2),
+            high=torch.arange(20).view(10, 2),
+            device=torch.device("cpu"),
+        )
+        spec.shape = (10, 2)
+        assert spec.shape == (10, 2)
+        assert spec.space.low.shape == (10, 2)
+        assert spec.space.high.shape == (10, 2)
+        assert spec.space._low.shape == (10, 2)
+        assert spec.space._high.shape == (10, 2)
+        c_spec = Composite(b=spec, shape=(10,))
+        assert spec.shape == (10, 2)
+        assert spec.space.low.shape == (10, 2)
+        assert spec.space.high.shape == (10, 2)
+        assert spec.space._low.shape == (10, 2)
+        assert spec.space._high.shape == (10, 2)
+
+
 class TestLegacy:
     def test_one_hot(self):
         with pytest.warns(
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -391,20 +391,61 @@ class ContinuousBox(Box):
     _low: torch.Tensor
     _high: torch.Tensor
     device: torch.device | None = None
+    _batch_size: torch.Size | None = None
+
+    @property
+    def batch_size(self):
+        return self._batch_size
+
+    @batch_size.setter
+    def batch_size(self, value: torch.Size | tuple):
+        # Check batch size is compatible with low and high
+        value = _remove_neg_shapes(value)
+        if self._batch_size is None:
+            if value != self._low.shape[: len(value)]:
+                raise ValueError(
+                    f"Batch size {value} is not compatible with low and high {self._low.shape}"
+                )
+        if value is None:
+            self._batch_size = None
+            self._low = self.low.clone()
+            self._high = self.high.clone()
+            return
+        # Remove batch size from low and high
+        if value:
+            # Check that low and high have a single value
+            td_low_high = TensorDict(
+                low=self.low, high=self.high, batch_size=value
+            ).flatten()
+            td_low_high0 = td_low_high[0]
+            if torch.allclose(
+                td_low_high0["low"], td_low_high["low"]
+            ) and torch.allclose(td_low_high0["high"], td_low_high["high"]):
+                self._low = td_low_high0["low"].clone()
+                self._high = td_low_high0["high"].clone()
+                self._batch_size = torch.Size(value)
+        else:
+            self._low = self.low.clone()
+            self._high = self.high.clone()
+            self._batch_size = torch.Size(value)
 
     # We store the tensors on CPU to avoid overloading CUDA with tensors that are rarely used.
     @property
     def low(self):
         low = self._low
         if self.device is not None and low.device != self.device:
             low = low.to(self.device)
+        if self._batch_size:
+            low = low.expand((*self._batch_size, *low.shape)).clone()
         return low
 
     @property
     def high(self):
         high = self._high
         if self.device is not None and high.device != self.device:
             high = high.to(self.device)
+        if self._batch_size:
+            high = high.expand((*self._batch_size, *high.shape)).clone()
         return high
 
     def unbind(self, dim: int = 0):
@@ -417,15 +458,30 @@ def unbind(self, dim: int = 0):
     def low(self, value):
         self.device = value.device
         self._low = value
+        if self._batch_size is not None:
+            if value.shape[: len(self._batch_size)] != self._batch_size:
+                raise ValueError(
+                    f"Batch size {value.shape[:len(self._batch_size)]} is not compatible with low and high {self._batch_size}"
+                )
+            if self._batch_size:
+                self._low = self._low.flatten(0, len(self._batch_size) - 1)[0].clone()
 
     @high.setter
     def high(self, value):
         self.device = value.device
         self._high = value
+        if self._batch_size is not None:
+            if value.shape[: len(self._batch_size)] != self._batch_size:
+                raise ValueError(
+                    f"Batch size {value.shape[:len(self._batch_size)]} is not compatible with low and high {self._batch_size}"
+                )
+            if self._batch_size:
+                self._high = self._high.flatten(0, len(self._batch_size) - 1)[0].clone()
 
     def __post_init__(self):
         self.low = self.low.clone()
         self.high = self.high.clone()
+        self._batch_size = None
 
     def __iter__(self):
         yield self.low
@@ -2366,6 +2422,10 @@ def __init__(
         )
         self.encode = self._encode_eager
 
+    def _register_batch_size(self, batch_size: torch.Size | tuple):
+        # Register batch size in the space to decrease the memory footprint of the specs
+        self.space.batch_size = batch_size
+
     def index(
         self, index: INDEX_TYPING, tensor_to_index: torch.Tensor | TensorDictBase
     ) -> torch.Tensor | TensorDictBase:
@@ -5191,6 +5251,8 @@ def set(self, name: str, spec: TensorSpec) -> Composite:
                         f"{self.ndim} dimensions should match but got spec.shape={spec.shape} and "
                         f"Composite.shape={self.shape}."
                     )
+        if isinstance(spec, Bounded):
+            spec._register_batch_size(self.shape)
         self._specs[name] = spec
         return self