Add same mode, C-code, and FFT support

jessegrabowski · jessegrabowski · commit ea87e03aadc4 · 2025-12-07T22:41:11.000-06:00
diff --git a/pytensor/tensor/signal/conv.py b/pytensor/tensor/signal/conv.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 from numpy import convolve as numpy_convolve
-from scipy.signal import convolve2d as scipy_convolve2d
+from scipy.signal import convolve as scipy_convolve
 
 from pytensor.gradient import DisconnectedType
 from pytensor.graph import Apply, Constant
@@ -246,25 +246,92 @@ def convolve1d(
 
 
 class Convolve2d(AbstractConvolveNd, Op):
-    __props__ = ()
+    __props__ = ("method",)
     ndim = 2
 
+    def __init__(self, method: Literal["direct", "fft", "auto"] = "auto"):
+        self.method = method
+
     def perform(self, node, inputs, outputs):
         in1, in2, full_mode = inputs
 
-        # if all(inpt.dtype.kind in ['f', 'c'] for inpt in inputs):
-        #     outputs[0][0] = scipy_convolve(in1, in2, mode=self.mode, method='fft')
-        #
-        # else:
-        # TODO: Why is .item() needed???
-        outputs[0][0] = scipy_convolve2d(
-            in1,
-            in2,
-            mode="full" if full_mode.item() else "valid",
-        )
+        # TODO: Why is .item() needed?
+        mode: Literal["full", "valid", "same"] = "full" if full_mode.item() else "valid"
+        outputs[0][0] = scipy_convolve(in1, in2, mode=mode, method=self.method)
+
+    def c_code_cache_version(self):
+        return (1,)
+
+    def c_code(self, node, name, inputs, outputs, sub):
+        in1, in2, full_mode = inputs
+        [out] = outputs
+
+        # For now, only the direct/correlation-based implementation is provided in C.
+        # FFT-based convolution would require us to link to a vendored FFT library. Scipy uses
+        # pypocketfft for that, but I'm not sure if we can easily call into that from here.
+        code = f"""
+        {{
+            if (PyArray_NDIM({in1}) != 2 || PyArray_NDIM({in2}) != 2) {{
+                PyErr_SetString(PyExc_ValueError, "Convolve2d C code expects 2D arrays.");
+                {sub["fail"]};
+            }}
 
+            npy_intp k0 = PyArray_DIM({in2}, 0);
+            npy_intp k1 = PyArray_DIM({in2}, 1);
 
-blockwise_convolve_2d = Blockwise(Convolve2d())
+            if (k0 == 0 || k1 == 0) {{
+                PyErr_SetString(PyExc_ValueError, "Convolve2d: second input (kernel) cannot be empty.");
+                {sub["fail"]};
+            }}
+
+            npy_intp dims[2] = {{k0, k1}};
+            npy_intp strides[2];
+            strides[0] = -PyArray_STRIDES({in2})[0];
+            strides[1] = -PyArray_STRIDES({in2})[1];
+
+            char* data = (char*)PyArray_DATA({in2})
+                         + (k0 - 1) * PyArray_STRIDES({in2})[0]
+                         + (k1 - 1) * PyArray_STRIDES({in2})[1];
+
+            PyArrayObject* in2_flipped_view = (PyArrayObject*)PyArray_NewFromDescr(
+                Py_TYPE({in2}),
+                PyArray_DESCR({in2}),
+                2,
+                dims,
+                strides,
+                data,
+                (PyArray_FLAGS({in2}) & ~NPY_ARRAY_WRITEABLE),
+                NULL
+            );
+
+            if (!in2_flipped_view) {{
+                PyErr_SetString(PyExc_RuntimeError, "Failed to create flipped kernel view for Convolve2d.");
+                {sub["fail"]};
+            }}
+
+            Py_INCREF({in2});
+            if (PyArray_SetBaseObject(in2_flipped_view, (PyObject*){in2}) < 0) {{
+                Py_DECREF({in2});
+                Py_DECREF(in2_flipped_view);
+                in2_flipped_view = NULL;
+                PyErr_SetString(PyExc_RuntimeError, "Failed to set base object for flipped kernel view in Convolve2d.");
+                {sub["fail"]};
+            }}
+
+            PyArray_UpdateFlags(in2_flipped_view, (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS));
+
+            int mode_int = {full_mode} ? 2 : 0;
+
+            Py_XDECREF({out});
+            {out} = (PyArrayObject*)PyArray_Correlate2((PyObject*){in1}, (PyObject*)in2_flipped_view, mode_int);
+            Py_XDECREF(in2_flipped_view);
+
+            if (!{out}) {{
+                {sub["fail"]};
+            }}
+        }}
+        """
+        return code
 
 
 def convolve2d(
@@ -273,6 +340,7 @@ def convolve2d(
     mode: Literal["full", "valid", "same"] = "full",
     boundary: Literal["fill", "wrap", "symm"] = "fill",
     fillvalue: float | int = 0,
+    method: Literal["direct", "fft", "auto"] = "auto",
 ) -> TensorVariable:
     """Convolve two two-dimensional arrays.
 
@@ -296,6 +364,10 @@ def convolve2d(
         - 'symm': Symmetrically reflects the input arrays.
     fillvalue : float or int, optional
         The value to use for padding when boundary is 'fill'. Default is 0.
+    method : str, one of 'direct', 'fft', or 'auto'
+        Computation method to use. 'direct' uses direct convolution, 'fft' uses FFT-based convolution,
+        and 'auto' lets the implementation choose the best method at runtime.
+
     Returns
     -------
     out: tensor_variable
@@ -304,29 +376,44 @@ def convolve2d(
     """
     in1 = as_tensor_variable(in1)
     in2 = as_tensor_variable(in2)
+    ndim = max(in1.type.ndim, in2.type.ndim)
+
+    def _pad_input(input_tensor, pad_width):
+        if boundary == "fill":
+            return pad(
+                input_tensor,
+                pad_width=pad_width,
+                mode="constant",
+                constant_values=fillvalue,
+            )
+        if boundary == "wrap":
+            return pad(input_tensor, pad_width=pad_width, mode="wrap")
+        if boundary == "symm":
+            return pad(input_tensor, pad_width=pad_width, mode="symmetric")
+        raise ValueError(f"Unsupported boundary mode: {boundary}")
 
     if mode == "same":
-        raise NotImplementedError("same mode not implemented for convolve2d")
+        # Same mode is implemented as "valid" with a padded input.
+        pad_width = zeros((ndim, 2), dtype="int64")
+        pad_width = pad_width[-2, 0].set(in2.shape[-2] // 2)
+        pad_width = pad_width[-2, 1].set((in2.shape[-2] - 1) // 2)
+        pad_width = pad_width[-1, 0].set(in2.shape[-1] // 2)
+        pad_width = pad_width[-1, 1].set((in2.shape[-1] - 1) // 2)
+        in1 = _pad_input(in1, pad_width)
+        mode = "valid"
 
     if mode != "valid" and (boundary != "fill" or fillvalue != 0):
         # We use a valid convolution on an appropriately padded kernel
         *_, k, l = in2.shape
-        ndim = max(in1.type.ndim, in2.type.ndim)
 
         pad_width = zeros((ndim, 2), dtype="int64")
         pad_width = pad_width[-2, :].set(k - 1)
         pad_width = pad_width[-1, :].set(l - 1)
-        if boundary == "fill":
-            in1 = pad(
-                in1, pad_width=pad_width, mode="constant", constant_values=fillvalue
-            )
-        elif boundary == "wrap":
-            in1 = pad(in1, pad_width=pad_width, mode="wrap")
-
-        elif boundary == "symm":
-            in1 = pad(in1, pad_width=pad_width, mode="symmetric")
+        in1 = _pad_input(in1, pad_width)
 
         mode = "valid"
 
     full_mode = as_scalar(np.bool_(mode == "full"))
-    return type_cast(TensorVariable, blockwise_convolve_2d(in1, in2, full_mode))
+    return type_cast(
+        TensorVariable, Blockwise(Convolve2d(method=method))(in1, in2, full_mode)
+    )
diff --git a/tests/tensor/signal/test_conv.py b/tests/tensor/signal/test_conv.py
@@ -102,12 +102,13 @@ def test_convolve1d_valid_grad(static_shape):
             "local_useless_unbatched_blockwise",
         ),
     )
-    grad_out.dprint()
+
     [conv_node] = [
         node
         for node in io_toposort([larger, smaller], [grad_out])
         if isinstance(node.op, Convolve1d)
     ]
+
     full_mode = conv_node.inputs[-1]
     # If shape is static we get constant mode == "valid", otherwise it depends on the input shapes
     # ignoring E712 because np.True_ and np.False_ need to be compared with `==` to produce a valid boolean
@@ -148,7 +149,7 @@ def test_convolve1d_grad_benchmark_c(convolve_mode, benchmark):
 @pytest.mark.parametrize(
     "data_shape", [(3, 3), (5, 5), (8, 8)], ids=lambda x: f"data_shape={x}"
 )
-@pytest.mark.parametrize("mode", ["full", "valid", "same"][:-1])
+@pytest.mark.parametrize("mode", ["full", "valid", "same"])
 @pytest.mark.parametrize(
     "boundary, boundary_kwargs",
     [
@@ -181,13 +182,29 @@ def test_convolve2d(kernel_shape, data_shape, mode, boundary, boundary_kwargs):
     utt.verify_grad(lambda k: op(data_val, k).sum(), [kernel_val])
 
 
-def test_batched_1d_agrees_with_diagonal_2d():
+def test_convolve2d_fft():
+    data = matrix("data")
+    kernel = matrix("kernel")
+    out_fft = convolve2d(data, kernel, mode="same", method="fft")
+    out_direct = convolve2d(data, kernel, mode="same", method="direct")
+
+    rng = np.random.default_rng()
+    data_val = rng.normal(size=(7, 5)).astype(config.floatX)
+    kernel_val = rng.normal(size=(3, 2)).astype(config.floatX)
+
+    fn = function([data, kernel], [out_fft, out_direct])
+    fft_res, direct_res = fn(data_val, kernel_val)
+    np.testing.assert_allclose(fft_res, direct_res)
+
+
+@pytest.mark.parametrize("mode", ["full", "valid", "same"])
+def test_batched_1d_agrees_with_2d_row_filter(mode):
     data = matrix("data")
     kernel_1d = vector("kernel_1d")
     kernel_2d = expand_dims(kernel_1d, 0)
 
-    output_1d = convolve1d(data, kernel_1d, mode="valid")
-    output_2d = convolve2d(data, kernel_2d, mode="valid")
+    output_1d = convolve1d(data, kernel_1d, mode=mode)
+    output_2d = convolve2d(data, kernel_2d, mode=mode)
 
     grad_1d = grad(output_1d.sum(), kernel_1d).ravel()
     grad_2d = grad(output_1d.sum(), kernel_1d).ravel()