NWChemEx · ryanmrichard · Feb 2, 2026 · Jan 28, 2026 · Jan 29, 2026 · Feb 2, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -117,48 +117,48 @@ if("${BUILD_TESTING}")
 
     include(get_catch2)
     include(cmake/mpi_test.cmake)
-    cxx_mpi_test(
+    cmaize_add_tests(
         unit_test_scf
         SOURCE_DIR "${CXX_TEST_DIR}/unit_tests"
         INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/src/scf"
         DEPENDS Catch2 scf
     )
-    python_mpi_test(
-        unit_test_scf
-        "${PYTHON_TEST_DIR}/unit_tests/run_unit_tests.py"
-        SUBMODULES simde chemist pluginplay parallelzone tensorwrapper
-    )
+    # python_mpi_test(
+    #     unit_test_scf
+    #     "${PYTHON_TEST_DIR}/unit_tests/run_unit_tests.py"
+    #     SUBMODULES simde chemist pluginplay parallelzone tensorwrapper
+    # )
 
     if("${INTEGRATION_TESTING}")
         include(get_nwchemex)
-        cxx_mpi_test(
+        cmaize_add_tests(
             integration_test_scf
             SOURCE_DIR "${CXX_TEST_DIR}/integration_tests"
             INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/src/scf"
             DEPENDS Catch2 nwchemex scf
         )
 
-        python_mpi_test(
-            integration_test_scf
-            "${PYTHON_TEST_DIR}/integration_tests/run_integration_tests.py"
-            SUBMODULES nwchemex chemcache integrals simde chemist pluginplay
-                       parallelzone friendzone tensorwrapper nux
-        )
+        # python_mpi_test(
+        #     integration_test_scf
+        #     "${PYTHON_TEST_DIR}/integration_tests/run_integration_tests.py"
+        #     SUBMODULES nwchemex chemcache integrals simde chemist pluginplay
+        #                parallelzone friendzone tensorwrapper nux
+        # )
 
         # Workaround for the NWX being pure python
-        if("${BUILD_PYBIND11_PYBINDINGS}")
-            get_test_property(
-                py_integration_test_scf
-                ENVIRONMENT py_path
-            )
-            set(
-                py_path "${py_path}:${nwchemex_SOURCE_DIR}/src/python"
-            )
-            set_tests_properties(
-                py_integration_test_scf
-                PROPERTIES ENVIRONMENT "${py_path}"
-            )
-        endif()
+        # if("${BUILD_PYBIND11_PYBINDINGS}")
+        #     get_test_property(
+        #         py_integration_test_scf
+        #         ENVIRONMENT py_path
+        #     )
+        #     set(
+        #         py_path "${py_path}:${nwchemex_SOURCE_DIR}/src/python"
+        #     )
+        #     set_tests_properties(
+        #         py_integration_test_scf
+        #         PROPERTIES ENVIRONMENT "${py_path}"
+        #     )
+        # endif()
     endif()
 endif()
 

diff --git a/src/scf/driver/scf_loop.cpp b/src/scf/driver/scf_loop.cpp
@@ -22,20 +22,23 @@ namespace {
 
 // Comparison between convergence metrics based on floating point type
 struct Kernel {
-    using rv_t = parallelzone::runtime::RuntimeView;
+    double m_tol;
 
-    explicit Kernel(rv_t rv) : m_rv(rv) {}
+    explicit Kernel(double tol) : m_tol(tol) {}
 
     template<typename FloatType>
-    auto run(const tensorwrapper::buffer::BufferBase& a, double tol) {
-        tensorwrapper::allocator::Eigen<FloatType> allocator(m_rv);
-        const auto& eigen_a = allocator.rebind(a);
-        return tensorwrapper::types::fabs(eigen_a.get_data(0)) < FloatType(tol);
+    auto operator()(const std::span<FloatType>& a) {
+        using tensorwrapper::types::fabs;
+        return fabs(a[0]) < std::decay_t<FloatType>(m_tol);
     }
-
-    rv_t m_rv;
 };
 
+auto check_tolerance(const tensorwrapper::buffer::BufferBase& v, double tol) {
+    Kernel kernel(tol);
+    const auto& buffer = tensorwrapper::buffer::make_contiguous(v);
+    return tensorwrapper::buffer::visit_contiguous_buffer(kernel, buffer);
+}
+
 // Pull out nuclear-nuclear interaction term, if there is one.
 struct GrabNuclear : chemist::qm_operator::OperatorVisitor {
     using V_nn_type = simde::type::V_nn_type;
@@ -47,6 +50,34 @@ struct GrabNuclear : chemist::qm_operator::OperatorVisitor {
     const V_nn_type* m_pv = nullptr;
 };
 
+struct ChangeTypeVisitor {
+    double m_val;
+    explicit ChangeTypeVisitor(double val) : m_val(val) {}
+
+    template<typename FloatType>
+    void operator()(std::span<FloatType> out) {
+        if constexpr(std::is_const_v<FloatType>) {
+            throw std::runtime_error(
+              "ChangeTypeVisitor: Cannot write to const buffer");
+        } else {
+            out[0] = std::decay_t<FloatType>(m_val);
+        }
+    }
+};
+
+// corr_type is only non-const so underlying type has correct cv-qualifiers
+auto convert_e_nuclear(simde::type::tensor& corr_type,
+                       const simde::type::tensor& e_nuc) {
+    using tensorwrapper::buffer::make_contiguous;
+    const auto& nuc_buffer = make_contiguous(e_nuc.buffer());
+    auto val = wtf::fp::float_cast<double>(nuc_buffer.get_elem({}));
+    tensorwrapper::shape::Smooth shape{};
+    ChangeTypeVisitor visitor(val);
+    auto temp_buffer = make_contiguous(corr_type.buffer(), shape);
+    tensorwrapper::buffer::visit_contiguous_buffer(visitor, temp_buffer);
+    return simde::type::tensor(shape, std::move(temp_buffer));
+}
+
 const auto desc = R"(
 )";
 
@@ -76,8 +107,6 @@ using pt = simde::Optimize<egy_pt<WfType>, WfType>;
 template<typename WfType>
 using update_pt = simde::UpdateGuess<WfType>;
 
-using tensorwrapper::utilities::floating_point_dispatch;
-
 MODULE_CTOR(SCFLoop) {
     using wf_type = simde::type::rscf_wf;
     description(desc);
@@ -243,10 +272,9 @@ MODULE_RUN(SCFLoop) {
             logger.log("  dG = " + grad_norm.to_string());
 
             // Check for convergence
-            Kernel k(get_runtime());
-            auto e_conv = floating_point_dispatch(k, de.buffer(), e_tol);
-            auto g_conv = floating_point_dispatch(k, grad_norm.buffer(), g_tol);
-            auto dp_conv = floating_point_dispatch(k, dp_norm.buffer(), dp_tol);
+            auto e_conv  = check_tolerance(de.buffer(), e_tol);
+            auto g_conv  = check_tolerance(grad_norm.buffer(), g_tol);
+            auto dp_conv = check_tolerance(dp_norm.buffer(), dp_tol);
             if(e_conv && g_conv && dp_conv) converged = true;
 
             // If using DIIS and not converged, extrapolate new Fock matrix
@@ -271,17 +299,9 @@ MODULE_RUN(SCFLoop) {
 
     tensor_t e_total;
 
-    // e_nuclear is a double. This hack converts it to udouble (if needed)
-    tensorwrapper::allocator::Eigen<double> dalloc(get_runtime());
-    using tensorwrapper::types::udouble;
-    tensorwrapper::allocator::Eigen<udouble> ualloc(get_runtime());
+    // This is a hack because WTF doesn't do auto-conversions yet
+    e_nuclear = convert_e_nuclear(e_old, e_nuclear);
 
-    if(ualloc.can_rebind(e_old.buffer())) {
-        tensor_t temp(e_old);
-        auto val = dalloc.rebind(e_nuclear.buffer()).get_data(0);
-        ualloc.rebind(temp.buffer()).set_data(0, val);
-        e_nuclear = temp;
-    }
     e_total("") = e_old("") + e_nuclear("");
 
     auto rv = results();

diff --git a/src/scf/eigen_solver/eigen_generalized.cpp b/src/scf/eigen_solver/eigen_generalized.cpp
@@ -22,25 +22,38 @@ namespace scf::eigen_solver {
 
 namespace {
 struct Kernel {
+    std::size_t m_n_rows;
+    std::size_t m_n_cols;
+
+    using tensor_t = simde::type::tensor;
+    using return_t = std::pair<tensor_t, tensor_t>;
+
+    Kernel(std::size_t n_rows, std::size_t n_cols) :
+      m_n_rows(n_rows), m_n_cols(n_cols) {}
+
+    template<typename FloatType0, typename FloatType1>
+    return_t operator()(const std::span<FloatType0>& A,
+                        const std::span<FloatType1>& B) {
+        throw std::runtime_error(
+          "EigenGeneralized Kernel: Mixed float types not supported");
+    }
+
     template<typename FloatType>
-    auto run(const tensorwrapper::buffer::BufferBase& A,
-             const tensorwrapper::buffer::BufferBase& B,
-             parallelzone::runtime::RuntimeView& rv) {
+    return_t operator()(const std::span<FloatType>& A,
+                        const std::span<FloatType>& B) {
+        using clean_t = std::decay_t<FloatType>;
         // Convert to Eigen buffers
-        tensorwrapper::allocator::Eigen<FloatType> allocator(rv);
-        const auto& eigen_A = allocator.rebind(A);
-        const auto& eigen_B = allocator.rebind(B);
 
         // Wrap the tensors in Eigen::Map objects to avoid copy
-        const auto* pA      = eigen_A.get_immutable_data();
-        const auto* pB      = eigen_B.get_immutable_data();
-        const auto& shape_A = eigen_A.layout().shape().as_smooth();
-        auto rows           = shape_A.extent(0);
-        auto cols           = shape_A.extent(1);
+        const auto* pA = A.data();
+        const auto* pB = B.data();
+        auto rows      = m_n_rows;
+        auto cols      = m_n_cols;
 
         constexpr auto rmajor = Eigen::RowMajor;
         constexpr auto edynam = Eigen::Dynamic;
-        using matrix_type = Eigen::Matrix<FloatType, edynam, edynam, rmajor>;
+        using clean_type      = std::decay_t<FloatType>;
+        using matrix_type = Eigen::Matrix<clean_type, edynam, edynam, rmajor>;
         using map_type    = Eigen::Map<const matrix_type>;
 
         map_type A_map(pA, rows, cols);
@@ -57,13 +70,15 @@ struct Kernel {
         tensorwrapper::layout::Physical vector_layout(vector_shape);
         tensorwrapper::layout::Physical matrix_layout(matrix_shape);
 
-        auto pvalues_buffer  = allocator.allocate(vector_layout);
-        auto pvectors_buffer = allocator.allocate(matrix_layout);
+        using tensorwrapper::buffer::make_contiguous;
+
+        auto pvalues_buffer  = make_contiguous<clean_t>(vector_shape);
+        auto pvectors_buffer = make_contiguous<clean_t>(matrix_shape);
 
         for(decltype(rows) i = 0; i < rows; ++i) {
-            pvalues_buffer->set_elem({i}, eigen_values(i));
+            pvalues_buffer.set_elem({i}, eigen_values(i));
             for(decltype(cols) j = 0; j < cols; ++j) {
-                pvectors_buffer->set_elem({i, j}, eigen_vectors(i, j));
+                pvectors_buffer.set_elem({i, j}, eigen_vectors(i, j));
             }
         }
 
@@ -91,13 +106,13 @@ MODULE_CTOR(EigenGeneralized) {
 MODULE_RUN(EigenGeneralized) {
     auto&& [A, B] = pt::unwrap_inputs(inputs);
 
-    using tensorwrapper::utilities::floating_point_dispatch;
-
-    auto r = get_runtime();
-    Kernel k;
-    const auto& A_buffer   = A.buffer();
-    const auto& B_buffer   = B.buffer();
-    auto [values, vectors] = floating_point_dispatch(k, A_buffer, B_buffer, r);
+    using tensorwrapper::buffer::make_contiguous;
+    const auto& A_buffer = make_contiguous(A.buffer());
+    const auto& B_buffer = make_contiguous(B.buffer());
+    const auto& A_shape  = A_buffer.shape();
+    Kernel k(A_shape.extent(0), A_shape.extent(1));
+    using tensorwrapper::buffer::visit_contiguous_buffer;
+    auto [values, vectors] = visit_contiguous_buffer(k, A_buffer, B_buffer);
 
     auto rv = results();
     return pt::wrap_results(rv, values, vectors);

diff --git a/src/scf/matrix_builder/density_matrix.cpp b/src/scf/matrix_builder/density_matrix.cpp
@@ -24,32 +24,32 @@ const auto desc = R"(
 )";
 
 struct Kernel {
+    Kernel(std::size_t n_aos, std::size_t n_occ) :
+      m_n_aos(n_aos), m_n_occ(n_occ) {}
+    std::size_t m_n_aos;
+    std::size_t m_n_occ;
+
     template<typename FloatType>
-    auto run(const tensorwrapper::buffer::BufferBase& c, std::size_t n_aos,
-             std::size_t n_occ) {
+    auto operator()(const std::span<FloatType>& c) {
         constexpr auto rmajor = Eigen::RowMajor;
         constexpr auto edynam = Eigen::Dynamic;
-        using allocator_type  = tensorwrapper::allocator::Eigen<FloatType>;
-        using tensor_type    = Eigen::Matrix<FloatType, edynam, edynam, rmajor>;
+        using clean_type      = std::decay_t<FloatType>;
+        using tensor_type = Eigen::Matrix<clean_type, edynam, edynam, rmajor>;
         using const_map_type = Eigen::Map<const tensor_type>;
-        auto rv              = c.allocator().runtime();
-        allocator_type alloc(rv);
 
-        tensorwrapper::shape::Smooth p_shape{n_aos, n_aos};
-        tensorwrapper::layout::Physical l(p_shape);
-        auto pp_buffer = alloc.allocate(l);
+        tensorwrapper::shape::Smooth p_shape{m_n_aos, m_n_aos};
+        auto pp_buffer =
+          tensorwrapper::buffer::make_contiguous<clean_type>(p_shape);
 
         // Step 2: Grab the orbitals in the ensemble
-        auto& c_buffer = alloc.rebind(c);
-
-        const_map_type c_eigen(c_buffer.get_immutable_data(), n_aos, n_aos);
-        auto slice = c_eigen.block(0, 0, n_aos, n_occ);
+        const_map_type c_eigen(c.data(), m_n_aos, m_n_aos);
+        auto slice = c_eigen.block(0, 0, m_n_aos, m_n_occ);
 
         // Step 3: CC_dagger
         tensor_type p_eigen = slice * slice.transpose();
-        for(std::size_t i = 0; i < n_aos; ++i) {
-            for(std::size_t j = 0; j < n_aos; ++j) {
-                pp_buffer->set_elem({i, j}, p_eigen(i, j));
+        for(std::size_t i = 0; i < m_n_aos; ++i) {
+            for(std::size_t j = 0; j < m_n_aos; ++j) {
+                pp_buffer.set_elem({i, j}, p_eigen(i, j));
             }
         }
         return simde::type::tensor(p_shape, std::move(pp_buffer));
@@ -96,10 +96,11 @@ MODULE_RUN(DensityMatrix) {
               "contiguous slice of the orbitals.");
 
     // TODO: The need to dispatch like this goes away when TW supports slicing
-    using tensorwrapper::utilities::floating_point_dispatch;
-    Kernel k;
-    auto p = floating_point_dispatch(k, c.buffer(), n_aos, participants.size());
-    auto rv = results();
+    using tensorwrapper::buffer::visit_contiguous_buffer;
+    Kernel k(n_aos, participants.size());
+    const auto& c_buffer = tensorwrapper::buffer::make_contiguous(c.buffer());
+    auto p               = visit_contiguous_buffer(k, c_buffer);
+    auto rv              = results();
     return pt::wrap_results(rv, p);
 }
 

diff --git a/src/scf/xc/aos_on_grid.cpp b/src/scf/xc/aos_on_grid.cpp
@@ -40,10 +40,9 @@ MODULE_RUN(AOsOnGrid) {
     auto n_aos                   = ao_basis.n_aos();
     using float_type             = double;
 
-    tensorwrapper::allocator::Eigen<float_type> allocator(get_runtime());
     tensorwrapper::shape::Smooth matrix_shape{n_aos, n_points};
-    tensorwrapper::layout::Physical layout(matrix_shape);
-    auto buffer = allocator.allocate(layout);
+    auto buffer =
+      tensorwrapper::buffer::make_contiguous<float_type>(matrix_shape);
 
     std::vector<std::size_t> idx{0, 0};
     for(const auto& atomic_basis : ao_basis) {
@@ -59,7 +58,7 @@ MODULE_RUN(AOsOnGrid) {
                     auto norm = std::sqrt(std::pow(2.0 * exponent / M_PI, 1.5));
                     ao_value += norm * val;
                 }
-                buffer->set_elem(idx, ao_value);
+                buffer.set_elem(idx, ao_value);
             }
             idx[0] += shell_i.size();
             idx[1] = 0;

diff --git a/src/scf/xc/gau2grid.cpp b/src/scf/xc/gau2grid.cpp
@@ -58,11 +58,8 @@ MODULE_RUN(Gau2Grid) {
     using float_type    = double;
     auto flattened_grid = flatten_grid<float_type>(grid);
 
-    tensorwrapper::allocator::Eigen<float_type> allocator(get_runtime());
     tensorwrapper::shape::Smooth matrix_shape{ao_basis.n_aos(), n_points};
-    tensorwrapper::layout::Physical layout(matrix_shape);
-    auto matrix_buffer = allocator.allocate(layout);
-    auto matrix_data   = matrix_buffer->get_mutable_data();
+    std::vector<float_type> matrix_data(matrix_shape.size());
 
     std::size_t ao_i = 0;
     for(const auto& atomic_basis : ao_basis) {
@@ -89,7 +86,7 @@ MODULE_RUN(Gau2Grid) {
             auto order   = is_pure ? GG_SPHERICAL_CCA : GG_CARTESIAN_CCA;
 
             auto offset       = ao_i * n_points;
-            auto shell_i_data = matrix_data + offset;
+            auto shell_i_data = matrix_data.data() + offset;
             gg_collocation(static_cast<int>(L), static_cast<int>(n_points),
                            flattened_grid.data(), 3,
                            static_cast<int>(n_primitives), coefficients.data(),
@@ -99,7 +96,8 @@ MODULE_RUN(Gau2Grid) {
             ao_i += n_aos;
         }
     }
-
+    tensorwrapper::buffer::Contiguous matrix_buffer(std::move(matrix_data),
+                                                    matrix_shape);
     simde::type::tensor collocation_matrix(matrix_shape,
                                            std::move(matrix_buffer));