translate swap_tensors.py

kookyungseon · dlcodns · web-flow · commit ac45e8caf14a · 2024-09-09T12:40:30.000+09:00
Co-authored-by: kookyungseon &lt;koo0685@naver.com
Co-authored-by: dlcodns &lt;redmond0412@gmail.com&gt;
diff --git a/recipes_source/recipes/swap_tensors.py b/recipes_source/recipes/swap_tensors.py
@@ -1,24 +1,24 @@
 """
-Extension points in ``nn.Module`` for ``load_state_dict`` and tensor subclasses
+nn.Module에서 ``load_state_dict`` 및 텐서 서브클래스의 확장 포인트
 ===============================================================================
-**Author:** `Mikayla Gawarecki <https://github.com/mikaylagawarecki>`_
+**저자:** `Mikayla Gawarecki <https://github.com/mikaylagawarecki>`_
 
-This recipe introduces a new utility function ``torch.utils.swap_tensors``
-as well as two new extension points where it has been integrated in
+이 레시피는 새로운 유틸리티 함수 ``torch.utils.swap_tensors``
+뿐만 아니라 이를 통합한 두 가지 새로운 확장 지점을 소개합니다
 ``nn.Module``:
 
-* ``nn.Module.to()`` and related methods
+* ``nn.Module.to()`` 및 관련 메서드
 * ``nn.Module.load_state_dict()``
 
-.. note::
-    This recipe requires PyTorch 2.3.0 or later.
+.. 주의::
+    이 레시피는 PyTorch 2.3.0 이상이 필요합니다.
 """
 
 ###############################################################################
 # ``torch.utils.swap_tensors``
 # ----------------------------
-# ``torch.utils.swap_tensors`` (hereafter referred to as ``swap_tensors``) is a
-# utility function that takes in two Python tensors and swaps them.
+# ``torch.utils.swap_tensors`` (이하 ``swap_tensors``로 언급됨)은
+# 두 개의 파이썬 텐서를 입력받아 서로 교환하는 유틸리티 함수입니다.
 
 import torch
 import torch.nn as nn
@@ -29,19 +29,19 @@
 print(f"After swapping, t1: {t1}, t2: {t2}")
 
 ################################################################################
-# More specifically, ``swap_tensors`` swaps the Python ``__class__``, ``__dict__``
-# and ``__slots__`` of the two tensors, as well as their associated ``at::Tensor``.
+# 더 구체적으로, ``swap_tensors``는 두 텐서의 파이썬 ``__class__``, ``__dict__``와
+# ``__slots__``뿐만 아니라 관련된 ``at::Tensor``도 교환합니다.
 #
 #
-# Application to ``nn.Module``
+# ``nn.Module``에의 적용
 # ----------------------------
-# This utility is pertinent to ``nn.Module`` when a Python object outside
-# of the module holds a reference to parameters of the module. If an ``nn.Module``
-# modifies any of its parameters out of place, the object holding references to
-# the parameters will not see the change. A classic example of this is the
-# optimizer, which holds a reference to the parameters of the ``nn.Module``.
-# This leads to a silent correctness issue where the ``optimizer.step()`` will
-# run without error but the weights of the ``nn.Module`` will not be updated.
+# 이 유틸리티는 모듈 외부의 파이썬 객체가 모듈의 파라미터에 대한
+# 참조를 보유하고 있을 때 ``nn.Module``에 관련이 있습니다. 만약 ``nn.Module``
+# 이 파라미터를 제자리에 수정하면, 파라미터에 대한 참조를 보유한 객체는
+# 변경 사항을 볼 수 없습니다. 고전적인 예로는 ``nn.Module``의 파라미터에 대한
+# 참조를 보유하는 옵티마이저가 있습니다. 이로 인해 ``optimizer.step()``이
+# 오류 없이 실행되지만, ``nn.Module``의 가중치는 업데이트되지 않는
+# 무성의 정확성 문제를 초래할 수 있습니다.
 
 mod = torch.nn.Linear(1, 2, bias=False)
 optimizer = torch.optim.SGD(mod.parameters())
@@ -52,28 +52,28 @@
 print(f"weight in optimizer: {optimizer.param_groups[0]['params']}")
 
 ################################################################################
-# ``nn.Module.to()`` and related methods
+# ``nn.Module.to()`` 및 관련 메서드
 # --------------------------------------
-# This includes methods that change the device of the module (such as ``nn.Module.cpu()``),
-# methods that change the ``dtype`` of the module (such as ``nn.Module.float()``)
-# as well as methods that allow the module to be materialized
-# (such as ``nn.Module.to_empty()``).
+# 여기에는 모듈의 디바이스를 변경하는 메서드(예: ``nn.Module.cpu()``),
+# 모듈의 ``dtype``을 변경하는 메서드(예: ``nn.Module.float()``)
+# 뿐만 아니라 모듈을 구체화할 수 있게 해주는 메서드
+# (예: ``nn.Module.to_empty()``)가 포함됩니다.
 #
-# At first glance, it might be non-intuitive that these methods are able to
-# modify the parameters of the module in-place. The existing approach has been
-# to use a nasty hack dating back from the first days of PyTorch.
+# 처음에는 이러한 메서드가 모듈의 파라미터를 제자리에서 수정할 수 있다는 것이
+# 직관적이지 않을 수 있습니다. 기존의 접근 방식은 PyTorch 초기부터 사용된
+# 복잡한 해킹 방법을 사용했습니다.
 #
-# Notably, the existing approach does not work in these cases:
+# 특히, 기존 접근 방식은 다음과 같은 경우에 작동하지 않습니다:
 #
-# * when using ``__torch_dispatch__`` subclasses
-# * when ``param`` and ``new_param`` do not have the same Python ``type()``
-# * For tensors with special C++ representations (such as sparse tensors and ``XLA`` tensors)
+# * ``__torch_dispatch__`` 서브클래스를 사용할 때
+# * ``param``과 ``new_param``의 파이썬 ``type()``이 동일하지 않을 때
+# * 특수 C++ 표현을 가진 텐서(예: 희소 텐서 및 ``XLA`` 텐서)
 #
-# In the following part of this recipe, we will define a toy ``__torch_dispatch__``
-# subclass ``MyQuantizedLinearWeight`` that represents quantized linear weights.
-# This subclass will be used for illustration purposes throughout the rest of
-# the tutorial. For brevity, we omit most of the ``__torch_dispatch__``
-# implementation.
+# 이 레시피의 다음 부분에서는 양자화된 선형 가중치를 나타내는
+# 장난감 ``__torch_dispatch__`` 서브클래스 ``MyQuantizedLinearWeight``를 정의할 것입니다.
+# 이 서브클래스는 튜토리얼의 나머지 부분에서 설명을 위해 사용됩니다.
+# 간결함을 위해 대부분의 ``__torch_dispatch__``
+# 구현은 생략합니다.
 aten = torch.ops.aten
 
 class MyQuantizedLinearWeight(torch.Tensor):
@@ -108,10 +108,10 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
         raise NotImplementedError(f"Unsupported function {func}")
 
 #################################################################################
-# Let us create an ``nn.Linear`` layer of ``dtype`` ``torch.float32`` where the weight is
-# a ``MyQuantizedLinearWeight`` and try to convert it to ``torch.bfloat16``.
-# Observe that the weight's ``dtype`` changes as expected. However, the ``dtype``
-# of the subclass' payload (``elem``) does not change.
+# ``dtype``가 ``torch.float32``인 ``nn.Linear`` 레이어를 생성하고, 가중치를
+# ``MyQuantizedLinearWeight``로 설정한 후, 이를 ``torch.bfloat16``으로 변환해 봅니다.
+# 가중치의 ``dtype``이 예상대로 변경되는 것을 관찰할 수 있습니다. 그러나
+# 서브클래스의 페이로드(``elem``)의 ``dtype``은 변경되지 않습니다.
 
 m = nn.Linear(3, 5, dtype=torch.float32)
 m.weight = torch.nn.Parameter(MyQuantizedLinearWeight(m.weight, 0.5))
@@ -123,12 +123,12 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
 print(f"m.bias.dtype: {m.bias.dtype}")
 
 ################################################################################
-# To this end, we introduce a global config
-# ``torch.__future__.set_swap_module_params_on_conversion`` that will use
-# ``swap_tensors`` to swap the parameters of the module while preserving
-# references in place of ``.data`` setting. When this config is set,
-# ``swap_tensors`` will be used during the conversion, which ensures that
-# the ``dtype`` of the payload is properly converted.
+# 이를 위해 글로벌 구성을 도입합니다
+# ``torch.__future__.set_swap_module_params_on_conversion``을 사용할 것입니다.
+# 이 구성은 ``swap_tensors``를 사용하여 모듈의 매개변수를 교환하며,
+# ``.data`` 설정 대신 참조를 보존합니다. 이 구성이 설정되면,
+# 변환 과정에서 ``swap_tensors``가 사용되며, 이를 통해
+# 페이로드의 ``dtype``이 올바르게 변환되도록 보장합니다.
 
 torch.__future__.set_swap_module_params_on_conversion(True)
 m = nn.Linear(3, 5, dtype=torch.float32)
@@ -144,42 +144,42 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
 ################################################################################
 # ``nn.Module.load_state_dict()``
 # --------------------------------
-# Depending on the value of the ``assign`` keyword argument passed
-# to ``load_state_dict()``, there are two ways to load the ``state_dict``:
+# ``load_state_dict()``에 전달된 ``assign`` 키워드 인수의 값에 따라,
+# ``state_dict``를 로드하는 두 가지 방법이 있습니다:
 #
-# * ``assign=False``: preserves the properties of ``module.param`` and only takes the values
-#   from ``state_dict['param_name']``
-# * ``assign=True``: preserves the properties and values of ``state_dict['param_name']``.
+# * ``assign=False``: ``module.param``의 속성을 보존하고, ``state_dict['param_name']``의
+#   값만 가져옵니다.
+# * ``assign=True``: ``state_dict['param_name']``의 속성과 값을 모두 보존합니다.
 #
 #
-# Previously, these were implemented with in-place ``copy_`` and ``__setattr__`` respectively.
-# With the existing implementation, each approach had its own limitations -- ``assign=False``
-# imposes the constraint that the type of the parameter in the ``state_dict`` must
-# be the same as the type of the parameter in the module while ``assign=True`` imposes
-# the constraint that anything that holds references to the module's parameters must
-# be initialized after ``nn.Module.load_state_dict()``.
+# 이전에는 각각 제자리에서 ``copy_``와 ``__setattr__``로 구현되었습니다.
+# 기존 구현에서는 각각의 접근 방식에 고유한 제한 사항이 있었습니다 -- ``assign=False``는
+# ``state_dict``의 매개변수 타입이
+# 모듈의 매개변수 타입과 동일해야 한다는 제약을 부과하는 반면, ``assign=True``는
+# 모듈의 매개변수에 대한 참조를 보유하는 모든 것이
+# ``nn.Module.load_state_dict()`` 이후에 초기화되어야 한다는 제약을 부과합니다.
 #
-# Now, we address both constraints by adding a ``swap_tensors`` path to ``load_state_dict()``
-# and introducing a new extension point ``torch.Tensor.module_load(self, other, assign=False)``.
-# When the ``swap_tensors`` path is enabled via the ``__future__`` mentioned above,
-# we can use a ``__torch_function__`` handler for ``module_load`` to apply a
-# custom transformation to the value in the ``state_dict``. The result of this
-# transformation will be swapped with the parameter in the module.
+# 이제 우리는 ``load_state_dict()``에 ``swap_tensors`` 경로를 추가하여 두 가지 제약을 해결합니다.
+# 그리고 새로운 확장 포인트 ``torch.Tensor.module_load(self, other, assign=False)``를 도입합니다.
+# 위에서 언급한 ``__future__``를 통해 ``swap_tensors`` 경로가 활성화되면,
+# ``module_load``에 대한 ``__torch_function__`` 핸들러를 사용하여
+# ``state_dict``의 값에 사용자 정의 변환을 적용할 수 있습니다. 이 변환의 결과는
+# 모듈의 매개변수와 교체됩니다.
 #
-# In the following example, we will use the ``MyQuantizedLinearWeight`` subclass
-# defined above to illustrate how we can use these features to apply a
-# custom quantization scheme to the weights of a linear layer when
-# loading the ``state_dict``.
+# 다음 예제에서는 ``MyQuantizedLinearWeight`` 서브클래스를 사용하여
+# 위에서 정의된 기능을 사용하여
+# 선형 레이어의 가중치에 사용자 정의 양자화 방식을 적용하는 방법을 보여줍니다.
+# ``state_dict``를 로드할 때.
 #
-# Recall that the ``__torch_function__`` handler for ``module_load`` will be
-# invoked if either ``self`` or ``other`` (in this case ``param`` or
-# ``state_dict[param_key]``) are ``MyQuantizedLinearWeight`` subclasses.
+# ``module_load``에 대한 ``__torch_function__`` 핸들러는 호출됩니다.
+# ``self`` 또는 ``other`` (이 경우 ``param`` 또는
+# ``state_dict[param_key]``)가 ``MyQuantizedLinearWeight`` 서브클래스인 경우.
 #
-# Assume that we expect the ``state_dict`` to contain plain tensors and the
-# module to contain ``MyQuantizedLinearWeight`` parameters where we want the
-# tensors in the ``state_dict`` to be transformed into the subclass. Then we
-# can define a ``__torch_function__`` handler for ``torch.Tensor.module_load``
-# as such:
+# ``state_dict``가 일반 텐서를 포함하고 있다고 가정하고,
+# 모듈이 ``MyQuantizedLinearWeight`` 파라미터를 포함하고 있으며,
+# ``state_dict``의 텐서가 서브클래스로 변환되기를 원합니다. 그럼,
+# 우리는 ``torch.Tensor.module_load``에 대한 ``__torch_function__`` 핸들러를 다음과 같이 정의할 수 있습니다:
+# 다음과 같이:
 
 @classmethod
 def custom_torch_function(cls, func, types, args=(), kwargs=None):
@@ -196,9 +196,9 @@ def custom_torch_function(cls, func, types, args=(), kwargs=None):
 MyQuantizedLinearWeight.__torch_function__ = custom_torch_function
 
 #################################################################################
-# First, let us create a skeleton of a model on the meta device to avoid
-# materializing storages. We convert all weights in the modules to
-# ``MyQuantizedLinearWeight`` subclasses while leaving biases intact.
+# 먼저, 메타 디바이스에서 모델의 스켈레톤을 생성하여 저장소를 실체화하는 것을 피합시다.
+# 저장소를 실체화하지 않습니다. 우리는 모듈의 모든 가중치를
+# `MyQuantizedLinearWeight` 서브클래스로 변환하면서 바이어스는 그대로 유지합니다.
 
 def fn(m):
     if isinstance(m, nn.Linear):
@@ -212,9 +212,9 @@ def fn(m):
     m.apply(fn)
 
 #################################################################################
-# We can then load the ``state_dict``. Observe that we use ``assign=True`` because
-# for biases, we want to preserve the properties of the tensor in the ``state_dict``
-# (for example, we do not want the bias to be on the ``meta`` device after loading).
+# 그러면 ``state_dict``를 로드할 수 있습니다. 바이어스의 경우 ``assign=True``를 사용하는데,
+# 바이어스의 경우, ``state_dict``에 있는 텐서의 속성을 유지하고자 합니다.
+# ``state_dict``에 있는 텐서의 속성을 유지하기 위해서입니다 (예를 들어, 로드 후 바이어스가 ``meta`` 디바이스에 있지 않도록).
 
 torch.__future__.set_swap_module_params_on_conversion(True)
 print(f"Before: id(weight)={id(m.weight)}, id(bias)={id(m.bias)}")
@@ -226,16 +226,16 @@ def fn(m):
 print(f"m.state_dict() after load_state_dict():\n {m.state_dict()}")
 
 #################################################################################
-# The above is a toy example of how we can use the new extension point in
-# ``nn.Module.load_state_dict()``. One can also imagine alternate scenarios such
-# as when we have tensor subclasses in the ``state_dict`` and plain ``nn.Parameters``/
-# tensors in the module or when both are tensor subclasses. Based on the use
-# case, we can define the ``__torch_function__`` handler for ``module_load``
-# to apply the transforms as needed.
+# 위의 예제는 ``nn.Module.load_state_dict()``에서 새로운 확장 지점을 사용하는 방법을 보여주는 장난감 예제입니다.
+# ``nn.Module.load_state_dict()``에서 새로운 확장 지점을 사용하는 방법을 보여줍니다. 또한 다른 시나리오를 상상할 수도 있습니다.
+# 예를 들어, ``state_dict``에 텐서 서브클래스가 있고 모듈에 일반 ``nn.Parameters``/ 
+# 모듈에 텐서가 있거나 둘 다 텐서 서브클래스일 때 등 다양한 시나리오를 상상할 수 있습니다. 사용에 따라
+# 시나리오에 따라 ``module_load``에 대한 ``__torch_function__`` 핸들러를 정의할 수 있습니다.
+# 필요에 따라 변환을 적용합니다.
 #
 # Conclusion
 # ----------
-# In this recipe, we learned about ``swap_tensors``, the importance
-# of preserving references for parameters in ``nn.Module`` as well as how to
-# use the two new extension points that are gated by
-# ``torch.__future__.set_swap_module_params_on_conversion``.
+# 이번 레시피에서는 ``swap_tensors``와 ``nn.Module``에서 파라미터의 참조를 보존하는 것의 중요성에 대해 배웠습니다.
+# ``nn.Module``에서 파라미터의 참조를 보존하는 것과 
+# 제어되는 두 가지 새로운 확장 지점을 사용하는 방법에 대해서도 배웠습니다.
+# ``torch.__future__.set_swap_module_params_on_conversion``에 의해