Fix for offloading when using TorchAO >= 0.7.0 (#3332)

a-r-r-o-w · XuehaiPan · web-flow · commit f0b030554cbc · 2025-01-13T16:54:28.000+01:00
* fix

* update

* fix

* apply suggestions from review

Co-Authored-By: Benjamin Bossan &lt;BenjaminBossan@users.noreply.github.com&gt;

Co-Authored-By: Xuehai Pan &lt;XuehaiPan@pku.edu.cn&gt;

* make style

---------

Co-authored-by: Xuehai Pan &lt;XuehaiPan@pku.edu.cn&gt;
diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py
@@ -14,6 +14,7 @@
 
 import contextlib
 import gc
+import importlib
 import inspect
 import json
 import logging
@@ -43,7 +44,7 @@
 from .memory import clear_device_cache, get_xpu_available_memory
 from .offload import load_offloaded_weight, offload_weight, save_offload_index
 from .tqdm import is_tqdm_available, tqdm
-from .versions import is_torch_version
+from .versions import compare_versions, is_torch_version
 
 
 if is_npu_available(check_device=False):
@@ -350,17 +351,19 @@ def set_module_tensor_to_device(
             elif param_cls.__name__ in ["QTensor", "QBitsTensor"]:
                 new_value = torch.nn.Parameter(new_value, requires_grad=old_value.requires_grad).to(device)
             elif param_cls.__name__ in ["AffineQuantizedTensor"]:
-                new_value = torch.nn.Parameter(
-                    param_cls(
-                        new_value.layout_tensor,
-                        new_value.block_size,
-                        new_value.shape,
-                        new_value.quant_min,
-                        new_value.quant_max,
-                        new_value.zero_point_domain,
-                    ),
-                    requires_grad=old_value.requires_grad,
-                ).to(device)
+                if importlib.util.find_spec("torchao") is not None and compare_versions("torchao", ">=", "0.7.0"):
+                    # TorchAO v0.7.0 made layout_tensor an internal private variable and exposed tensor_impl
+                    args = (new_value.tensor_impl,)
+                else:
+                    args = (new_value.layout_tensor,)
+                args += (
+                    new_value.block_size,
+                    new_value.shape,
+                    new_value.quant_min,
+                    new_value.quant_max,
+                    new_value.zero_point_domain,
+                )
+                new_value = torch.nn.Parameter(param_cls(*args), requires_grad=old_value.requires_grad).to(device)
             else:
                 new_value = param_cls(new_value, requires_grad=old_value.requires_grad).to(device)