Merge pull request #2225 from spinkney/feature/ginv

t4c1 · web-flow · commit d932ecfe761a · 2020-12-21T09:39:12.000+01:00
Adding Generalized Inverse
diff --git a/stan/math/prim/fun.hpp b/stan/math/prim/fun.hpp
@@ -102,6 +102,7 @@
 #include <stan/math/prim/fun/fmod.hpp>
 #include <stan/math/prim/fun/gamma_p.hpp>
 #include <stan/math/prim/fun/gamma_q.hpp>
+#include <stan/math/prim/fun/generalized_inverse.hpp>
 #include <stan/math/prim/fun/get.hpp>
 #include <stan/math/prim/fun/get_base1.hpp>
 #include <stan/math/prim/fun/get_base1_lhs.hpp>
diff --git a/stan/math/prim/fun/generalized_inverse.hpp b/stan/math/prim/fun/generalized_inverse.hpp
@@ -0,0 +1,54 @@
+#ifndef STAN_MATH_PRIM_FUN_GENERALIZED_INVERSE_HPP
+#define STAN_MATH_PRIM_FUN_GENERALIZED_INVERSE_HPP
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/prim/fun/to_ref.hpp>
+#include <stan/math/prim/fun/inverse.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Returns the Moore-Penrose generalized inverse of the specified matrix.
+ *
+ * The method is based on the Cholesky computation of the transform as specified
+ *  in
+ *
+ * <ul><li> Courrieu, Pierre. 2008.  Fast Computation of Moore-Penrose Inverse
+ Matrices.
+ * <i>arXiv</i> <b>0804.4809</b> </li></ul>
+ *
+ * @tparam EigMat type of the matrix (must be derived from `Eigen::MatrixBase`)
+ *
+ * @param G specified matrix
+ * @return Generalized inverse of the matrix (an empty matrix if the specified
+ * matrix has size zero).
+ */
+template <typename EigMat, require_eigen_t<EigMat>* = nullptr,
+          require_not_vt_var<EigMat>* = nullptr>
+inline Eigen::Matrix<value_type_t<EigMat>, EigMat::ColsAtCompileTime,
+                     EigMat::RowsAtCompileTime>
+generalized_inverse(const EigMat& G) {
+  using value_t = value_type_t<EigMat>;
+
+  if (G.size() == 0)
+    return {};
+
+  if (G.rows() == G.cols())
+    return inverse(G);
+
+  const auto& G_ref = to_ref(G);
+
+  if (G.rows() < G.cols()) {
+    return (G_ref * G_ref.transpose()).ldlt().solve(G_ref).transpose();
+  } else {
+    return (G_ref.transpose() * G_ref).ldlt().solve(G_ref.transpose());
+  }
+}
+
+}  // namespace math
+}  // namespace stan
+
+#endif
diff --git a/stan/math/rev/fun.hpp b/stan/math/rev/fun.hpp
@@ -62,6 +62,7 @@
 #include <stan/math/rev/fun/from_var_value.hpp>
 #include <stan/math/rev/fun/gamma_p.hpp>
 #include <stan/math/rev/fun/gamma_q.hpp>
+#include <stan/math/rev/fun/generalized_inverse.hpp>
 #include <stan/math/rev/fun/gp_periodic_cov.hpp>
 #include <stan/math/rev/fun/grad.hpp>
 #include <stan/math/rev/fun/grad_inc_beta.hpp>
diff --git a/stan/math/rev/fun/generalized_inverse.hpp b/stan/math/rev/fun/generalized_inverse.hpp
@@ -0,0 +1,93 @@
+#ifndef STAN_MATH_REV_FUN_GENERALIZED_INVERSE_HPP
+#define STAN_MATH_REV_FUN_GENERALIZED_INVERSE_HPP
+
+#include <stan/math/rev/core.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/rev/fun/inverse.hpp>
+
+namespace stan {
+namespace math {
+
+namespace internal {
+/*
+ * Reverse mode specialization of calculating the generalized inverse of a
+ * matrix.
+ * <ul><li> Golub, G.H. and Pereyra, V. The Differentiation of Pseudo-Inverses
+ * and Nonlinear Least Squares Problems Whose Variables Separate. <i>SIAM
+ * Journal on Numerical Analysis</i>, Vol. 10, No. 2 (Apr., 1973), pp.
+ * 413-432</li></ul>
+ */
+template <typename T1, typename T2>
+inline auto generalized_inverse_lambda(T1& G_arena, T2& inv_G) {
+  return [G_arena, inv_G]() mutable {
+    G_arena.adj()
+        += -(inv_G.val_op().transpose() * inv_G.adj_op()
+             * inv_G.val_op().transpose())
+           + (-G_arena.val_op() * inv_G.val_op()
+              + Eigen::MatrixXd::Identity(G_arena.rows(), inv_G.cols()))
+                 * inv_G.adj_op().transpose() * inv_G.val_op()
+                 * inv_G.val_op().transpose()
+           + inv_G.val_op().transpose() * inv_G.val_op()
+                 * inv_G.adj_op().transpose()
+                 * (-inv_G.val_op() * G_arena.val_op()
+                    + Eigen::MatrixXd::Identity(inv_G.rows(), G_arena.cols()));
+  };
+}
+}  // namespace internal
+
+/*
+ * Reverse mode specialization of calculating the generalized inverse of a
+ * matrix.
+ *
+ * @param G specified matrix
+ * @return Generalized inverse of the matrix (an empty matrix if the specified
+ * matrix has size zero).
+ *
+ * @note For the derivatives of this function to exist the matrix must be
+ * of constant rank.
+ * Reverse mode differentiation algorithm reference:
+ *
+ * <ul><li> Golub, G.H. and Pereyra, V. The Differentiation of Pseudo-Inverses
+ * and Nonlinear Least Squares Problems Whose Variables Separate. <i>SIAM
+ * Journal on Numerical Analysis</i>, Vol. 10, No. 2 (Apr., 1973), pp.
+ * 413-432</li></ul>
+ *
+ * Equation 4.12 in the paper
+ *
+ *  See also
+ *  http://mathoverflow.net/questions/25778/analytical-formula-for-numerical-derivative-of-the-matrix-pseudo-inverse
+ *
+ */
+template <typename VarMat, require_rev_matrix_t<VarMat>* = nullptr>
+inline auto generalized_inverse(const VarMat& G) {
+  using value_t = value_type_t<VarMat>;
+  using ret_type = promote_var_matrix_t<VarMat, VarMat>;
+
+  if (G.size() == 0)
+    return ret_type(G);
+
+  if (G.rows() == G.cols())
+    return ret_type(inverse(G));
+
+  if (G.rows() < G.cols()) {
+    arena_t<VarMat> G_arena(G);
+    arena_t<ret_type> inv_G((G_arena.val_op() * G_arena.val_op().transpose())
+                                .ldlt()
+                                .solve(G_arena.val_op())
+                                .transpose());
+    reverse_pass_callback(internal::generalized_inverse_lambda(G_arena, inv_G));
+    return ret_type(inv_G);
+  } else {
+    arena_t<VarMat> G_arena(G);
+    arena_t<ret_type> inv_G((G_arena.val_op().transpose() * G_arena.val_op())
+                                .ldlt()
+                                .solve(G_arena.val_op().transpose()));
+    reverse_pass_callback(internal::generalized_inverse_lambda(G_arena, inv_G));
+    return ret_type(inv_G);
+  }
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/test/unit/math/mix/fun/generalized_inverse_test.cpp b/test/unit/math/mix/fun/generalized_inverse_test.cpp
@@ -0,0 +1,54 @@
+#include <test/unit/math/test_ad.hpp>
+#include <vector>
+#include <gtest/gtest.h>
+
+TEST(mathMixMatFun, ad_tests) {
+  using stan::test::expect_ad;
+  using stan::test::expect_ad_matvar;
+
+  auto f = [](const auto& G) { return stan::math::generalized_inverse(G); };
+
+  Eigen::MatrixXd t(0, 0);
+  expect_ad(f, t);
+  expect_ad_matvar(f, t);
+
+  Eigen::MatrixXd u(1, 1);
+  u << 2;
+  expect_ad(f, u);
+  expect_ad_matvar(f, u);
+
+  Eigen::MatrixXd v(2, 3);
+  v << 1, 3, 5, 2, 4, 6;
+  expect_ad(f, v);
+  expect_ad_matvar(f, v);
+
+  v << 1.9, 1.3, 2.5, 0.4, 1.7, 0.1;
+  expect_ad(f, v);
+  expect_ad_matvar(f, v);
+
+  Eigen::MatrixXd s(2, 4);
+  s << 3.4, 2, 5, 1.2, 2, 1, 3.2, 3.1;
+  expect_ad(f, s);
+  expect_ad_matvar(f, s);
+
+  // issues around zero require looser tolerances for hessians
+  stan::test::ad_tolerances tols;
+  tols.hessian_hessian_ = 2.0;
+  tols.hessian_fvar_hessian_ = 2.0;
+
+  Eigen::MatrixXd w(3, 4);
+  w << 2, 3, 5, 7, 11, 13, 17, 19, 23, 25, 27, 29;
+  expect_ad(tols, f, w);
+  expect_ad_matvar(f, w);
+
+  Eigen::MatrixXd z(2, 2);
+  z << 1, 2, 5, std::numeric_limits<double>::quiet_NaN();
+  EXPECT_NO_THROW(stan::math::generalized_inverse(z));
+
+  // autodiff throws, so following fails (throw behavior must match to pass)
+
+  Eigen::MatrixXd a(2, 2);
+  a << 1.9, 0.3, 0.3, std::numeric_limits<double>::infinity();
+  expect_ad(f, a);
+  expect_ad_matvar(f, a);
+}
diff --git a/test/unit/math/prim/fun/generalized_inverse_test.cpp b/test/unit/math/prim/fun/generalized_inverse_test.cpp
@@ -0,0 +1,47 @@
+#include <stan/math/prim.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+
+TEST(MathMatrixPrim, Zero) {
+  stan::math::matrix_d m0(0, 0);
+  stan::math::matrix_d ginv = stan::math::generalized_inverse(m0);
+  EXPECT_EQ(0, ginv.rows());
+  EXPECT_EQ(0, ginv.cols());
+}
+
+TEST(MathMatrixPrim, Singular) {
+  using stan::math::generalized_inverse;
+
+  stan::math::matrix_d m1(2, 3);
+  m1 << 1, 2, 1, 2, 4, 2;
+
+  stan::math::matrix_d m2 = m1 * generalized_inverse(m1) * m1;
+  EXPECT_MATRIX_NEAR(m1, m2, 1e-9);
+}
+
+TEST(MathMatrixPrim, Equal1) {
+  using stan::math::generalized_inverse;
+
+  stan::math::matrix_d m1(2, 3);
+  m1 << 1, 3, 5, 2, 4, 6;
+
+  stan::math::matrix_d m2(2, 2);
+  m2 << 1, 0, 0, 1;
+
+  stan::math::matrix_d m3 = m1 * generalized_inverse(m1);
+  EXPECT_MATRIX_NEAR(m2, m3, 1e-9);
+}
+
+TEST(MathMatrixPrim, Equal2) {
+  using stan::math::generalized_inverse;
+
+  stan::math::matrix_d m1(3, 2);
+  m1 << 1, 2, 2, 4, 1, 2;
+
+  stan::math::matrix_d m2(3, 3);
+  m2 << 1.0 / 6.0, 1.0 / 3.0, 1.0 / 6.0, 1.0 / 3.0, 2.0 / 3.0, 1.0 / 3.0,
+      1.0 / 6.0, 1.0 / 3.0, 1.0 / 6.0;
+
+  stan::math::matrix_d m3 = m1 * generalized_inverse(m1);
+  EXPECT_MATRIX_NEAR(m2, m3, 1e-9);
+}