Revert "[functorch] linearize (pytorch#94173)"

pytorchmergebot · pytorchmergebot · commit e0e4f1a8905a · 2023-02-09T09:22:39.000Z
This reverts commit b6b9e1e. Reverted pytorch#94173 on behalf of https://github.com/kshitij12345 due to Broke lint runner
diff --git a/docs/source/func.api.rst b/docs/source/func.api.rst
@@ -16,7 +16,6 @@ Function Transforms
      grad_and_value
      vjp
      jvp
-     linearize
      jacrev
      jacfwd
      hessian
diff --git a/test/functorch/test_eager_transforms.py b/test/functorch/test_eager_transforms.py
@@ -20,9 +20,8 @@
 import unittest
 import warnings
 import math
-from torch.testing._internal.common_device_type import instantiate_device_type_tests, onlyCPU, dtypes, onlyCUDA
+from torch.testing._internal.common_device_type import instantiate_device_type_tests, onlyCPU
 from torch.testing._internal.common_dtype import get_all_fp_dtypes
-from torch.testing import make_tensor
 from torch._subclasses.fake_tensor import FakeTensorMode
 from functools import partial
 from functorch.experimental import replace_all_batch_norm_modules_
@@ -41,7 +40,7 @@
 from torch._ops import PyOperator
 from torch._functorch.utils import enable_single_level_autograd_function
 import torch.autograd.forward_ad as fwAD
-from torch.func import functional_call, stack_module_state, linearize
+from torch.func import functional_call, stack_module_state
 
 # NB: numpy is a testing dependency!
 import numpy as np
@@ -2501,102 +2500,6 @@ def push_jvp(dummy, x):
         vmap(vmap(push_jvp, (0, None)))(dummy, x)
 
 
-class TestLinearize(TestCase):
-    @dtypes(torch.float)
-    def test_linearize_basic(self, device, dtype):
-        x_p = make_tensor((3, 1), device=device, dtype=dtype)
-        x_t = make_tensor((3, 1), device=device, dtype=dtype)
-
-        def fn(x):
-            return x.cos()
-
-        actual_output, jvp_fn = linearize(fn, x_p)
-        actual_jvp = jvp_fn(x_t)
-        expected_output, expected_jvp = jvp(fn, (x_p,), (x_t,))
-        self.assertEqual(actual_output, expected_output)
-        self.assertEqual(actual_jvp, expected_jvp)
-
-    @dtypes(torch.float)
-    def test_linearize_return(self, device, dtype):
-        x_p = make_tensor((3, 1), device=device, dtype=dtype)
-        x_t = make_tensor((3, 1), device=device, dtype=dtype)
-
-        def fn(x):
-            return (x.cos(), x.sum())
-
-        actual_output, jvp_fn = linearize(fn, x_p)
-        actual_jvp = jvp_fn(x_t)
-        expected_output, expected_jvp = jvp(fn, (x_p,), (x_t,))
-        self.assertEqual(actual_output, expected_output)
-        self.assertEqual(actual_jvp, expected_jvp)
-
-    @dtypes(torch.float)
-    def test_linearize_composition(self, device, dtype):
-        x_p = make_tensor((3, 1), device=device, dtype=dtype)
-        x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
-
-        def fn(x):
-            return (x.cos(), x.sum())
-
-        _, jvp_fn = linearize(fn, x_p)
-        actual_batched_jvp = vmap(jvp_fn)(x_t)
-
-        def jvp_fn(x_t):
-            return jvp(fn, (x_p,), (x_t,))[1]
-        expected_batched_jvp = vmap(jvp_fn)(x_t)
-
-        self.assertEqual(actual_batched_jvp, expected_batched_jvp)
-
-    @dtypes(torch.float)
-    def test_linearize_nested_input_nested_output(self, device, dtype):
-        x_p = make_tensor((3, 1), device=device, dtype=dtype)
-        x_t = make_tensor((3, 1), device=device, dtype=dtype)
-        y_p = make_tensor((3, 1), device=device, dtype=dtype)
-        y_t = make_tensor((3, 1), device=device, dtype=dtype)
-        z_p = make_tensor((3, 1), device=device, dtype=dtype)
-        z_t = make_tensor((3, 1), device=device, dtype=dtype)
-
-        def fn(arg):
-            x = arg['x']
-            y = arg['yz'][0]
-            z = arg['yz'][1]
-
-            return {'a': x.sum(), 'b': {'c': y + z, 'd': (x * z, y.exp())}}
-
-        inp_p = {'x': x_p, 'yz': (y_p, z_p)}
-        inp_t = {'x': x_t, 'yz': (y_t, z_t)}
-        actual_output, jvp_fn = linearize(fn, inp_p)
-        actual_jvp = jvp_fn(inp_t)
-
-        expected_output, expected_jvp = jvp(fn, (inp_p,), (inp_t,))
-
-        self.assertEqual(actual_output, expected_output)
-        self.assertEqual(actual_jvp, expected_jvp)
-
-    @onlyCUDA
-    def test_linearize_errors(self):
-        dtype = torch.float
-        device = torch.device('cpu')
-        x_p = make_tensor((3, 1), device=device, dtype=dtype)
-        x_t = make_tensor((3, 1), device=device, dtype=dtype)
-
-        def fn(x):
-            return x.sin()
-
-        _, jvp_fn = linearize(fn, x_p)
-
-        with self.assertRaisesRegex(RuntimeError, "to have the same argspec as the primals"):
-            jvp_fn((x_t, x_t))
-
-        with self.assertRaisesRegex(RuntimeError, "in flattened pytree doesn't match the shape"):
-            jvp_fn(x_t.unsqueeze(0))
-
-        with self.assertRaisesRegex(RuntimeError, "in flattened pytree doesn't match the dtype"):
-            jvp_fn(x_t.to(torch.double))
-
-        with self.assertRaisesRegex(RuntimeError, "in flattened pytree doesn't match the device"):
-            jvp_fn(x_t.to(torch.device('cuda')))
-
 # The tests here follow the cases in [Forward Grad View/inplace]
 # https://github.com/pytorch/pytorch/blob/master/torch/csrc/autograd/autograd_meta.cpp#L18-L43
 class TestVmapJvpInplaceView(TestCase):
@@ -4549,11 +4452,6 @@ def test_functional_call_multiple_dicts(self):
     globals(),
     only_for=only_for,
 )
-instantiate_device_type_tests(
-    TestLinearize,
-    globals(),
-    only_for=only_for,
-)
 instantiate_device_type_tests(
     TestVmapJvpInplaceView,
     globals(),
diff --git a/torch/_functorch/eager_transforms.py b/torch/_functorch/eager_transforms.py
@@ -8,9 +8,7 @@
 import torch
 from functools import partial, wraps
 import contextlib
-from torch.utils._pytree import tree_flatten, tree_unflatten, tree_map, tree_map_only
-from torch.fx.experimental import const_fold
-from torch.fx.experimental.proxy_tensor import make_fx
+from torch.utils._pytree import tree_flatten, tree_unflatten, tree_map
 from .pytree_hacks import tree_map_, treespec_pprint
 import torch.autograd.forward_ad as fwAD
 
@@ -1602,112 +1600,3 @@ def wrapped(*args, **kwargs):
         finally:
             _func_decrement_nesting()
     return wrapped
-
-@exposed_in("torch.func")
-def linearize(func: Callable, *primals) -> Tuple[Any, Callable]:
-    '''
-    Returns the value of ``func`` at ``primals`` and linear approximation
-    at ``primals``.
-
-    Args:
-        func (Callable): A Python function that takes one or more arguments.
-        primals (Tensors): Positional arguments to ``func`` that must all be
-            Tensors. These are the values at which the function is linearly approximated.
-
-    Returns:
-        Returns a ``(output, jvp_fn)`` tuple containing the output of ``func``
-        applied to ``primals`` and a function that computes the jvp of
-        ``func`` evaluated at ``primals``.
-
-    linearize is useful if jvp is to be computed multiple times at ``primals``. However,
-    to achieve this, linearize saves intermediate computation and has higher memory requrements
-    than directly applying `jvp`. So, if all the ``tangents`` are known, it maybe more efficient
-    to compute vmap(jvp) instead of using linearize.
-
-    .. note::
-        linearize evaluates ``func`` twice. Please file an issue for an implementation
-        with a single evaluation.
-
-    Example::
-        >>> import torch
-        >>> from torch.func import linearize
-        >>> def fn(x):
-        ...     return x.sin()
-        ...
-        >>> output, jvp_fn = linearize(fn, torch.zeros(3, 3))
-        >>> jvp_fn(torch.ones(3, 3))
-        tensor([[1., 1., 1.],
-                [1., 1., 1.],
-                [1., 1., 1.]])
-        >>>
-
-    '''
-    # Note: We evaluate `fn` twice.
-    # Once for returning the output and other while
-    # tracing the graph.
-    # If this becomes a bottle-neck, we should update
-    # make_fx such that it also returns the output.
-
-    output = func(*primals)
-    _, output_spec = tree_flatten(output)
-
-    flat_primals, primals_argspec = tree_flatten(primals)
-
-    # tangents for tracing
-    flat_tangents = tuple(p.new_empty(()).expand_as(p) for p in flat_primals)
-
-    # function to trace
-    def trace_fn(flat_tangents):
-        with fwAD.dual_level():
-            flat_duals = tuple(fwAD.make_dual(p, t) for p, t in zip(flat_primals, flat_tangents))
-            duals = tree_unflatten(flat_duals, primals_argspec)
-            output = func(*duals)
-            tangents = tree_map_only(torch.Tensor, lambda t: fwAD.unpack_dual(t)[1], output)
-
-        return tangents
-
-    jvp_graph = make_fx(trace_fn)(flat_tangents)
-    const_folded_jvp_graph = const_fold.split_const_subgraphs(jvp_graph)
-
-    # Hold only the meta-data regarding the primals.
-    flat_primals_shape = tuple(p.shape for p in flat_primals)
-    flat_primals_device = tuple(p.device for p in flat_primals)
-    flat_primals_dtype = tuple(p.dtype for p in flat_primals)
-
-    def forward_ad_checks(flat_tangents):
-        for idx, t in enumerate(flat_tangents):
-            if t.shape != flat_primals_shape[idx]:
-                msg = (f"tangent:{idx} with shape {t.shape} in flattened "
-                       f"pytree doesn't match the shape {flat_primals_shape[idx]} "
-                       "of the corresponding primal.")
-                raise RuntimeError(msg)
-
-            if t.device != flat_primals_device[idx]:
-                msg = (f"tangent:{idx} with device {t.device} in flattened "
-                       f"pytree doesn't match the device {flat_primals_device[idx]} "
-                       "of the corresponding primal.")
-                raise RuntimeError(msg)
-
-            if t.dtype != flat_primals_dtype[idx]:
-                msg = (f"tangent:{idx} with dtype {t.dtype} in flattened "
-                       f"pytree doesn't match the dtype {flat_primals_dtype[idx]} "
-                       "of the corresponding primal.")
-                raise RuntimeError(msg)
-
-    # jvp_fn : callable to return
-    #   It takes care of checking the argspec of tangents,
-    #   calling the folded fx graph and unflattening fx graph output
-    def jvp_fn(*tangents):
-        flat_tangents, tangent_argspec = tree_flatten(tangents)
-        if tangent_argspec != primals_argspec:
-            raise RuntimeError(f"Expected the tangents {tangent_argspec} to have "
-                               f"the same argspec as the primals {primals_argspec}")
-
-        forward_ad_checks(flat_tangents)
-
-        flat_output = const_folded_jvp_graph(*flat_tangents)
-        # const folded graph can return flat output,
-        # so transform output.
-        return tree_unflatten(flat_output, output_spec)
-
-    return output, jvp_fn
diff --git a/torch/func/__init__.py b/torch/func/__init__.py
@@ -7,7 +7,6 @@
     jacfwd,
     hessian,
     functionalize,
-    linearize
 )
 from torch._functorch.functional_call import functional_call, stack_module_state
 from torch._functorch.batch_norm_replacement import replace_all_batch_norm_modules_
diff --git a/torch/fx/experimental/const_fold.py b/torch/fx/experimental/const_fold.py
@@ -6,8 +6,6 @@
 from torch.fx.passes.split_module import split_module
 
 
-__all__ = ['FoldedGraphModule', 'get_unique_attr_name_in_module', 'split_const_subgraphs']
-
 class FoldedGraphModule(torch.fx.GraphModule):
     """
     FoldedGraphModule is a GraphModule which also contains another

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,6 @@`
`7`	`7`	`jacfwd,`
`8`	`8`	`hessian,`
`9`	`9`	`functionalize,`
`10`		`- linearize`
`11`	`10`	`)`
`12`	`11`	`from torch._functorch.functional_call import functional_call, stack_module_state`
`13`	`12`	`from torch._functorch.batch_norm_replacement import replace_all_batch_norm_modules_`