From e8342fae8b3672e1bcc715ab276897117f09f857 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 1 Aug 2017 11:21:07 +0200
Subject: [PATCH 01/12] initial commit to use parallel version of amgcl schur
 complement solver - not yet working

---
 .../add_trilinos_linear_solvers_to_python.cpp |   6 +
 .../external_includes/mpi_amgcl_ns_solver.h   | 380 ++++++++++++++++++
 .../trilinos_linear_solver_factory.py         |   2 +
 3 files changed, 388 insertions(+)
 create mode 100644 applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
diff --git a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp
index b21656b3d2fb..dd10f7bb4bb8 100644
--- a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp
+++ b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp
@@ -74,6 +74,7 @@
 #include "external_includes/ml_solver.h"
 
 #include "external_includes/amgcl_solver.h"
+#include "external_includes/mpi_amgcl_ns_solver.h"
 
 namespace Kratos
 {
@@ -135,6 +136,11 @@ void  AddLinearSolvers()
     .def("SetIntParameter", &AmgclMPISolverType::SetIntParameter)
     ;
     
+    typedef AmgclMPI_NS_Solver<TrilinosSparseSpaceType, TrilinosLocalSpaceType > AmgclMPI_NS_SolverType;
+    class_<AmgclMPI_NS_SolverType, bases<TrilinosLinearSolverType>, boost::noncopyable >
+    ("AmgclMPI_NS_Solver",init<Parameters>()) 
+    ;
+    
     enum_<AztecScalingType>("AztecScalingType")
     .value("NoScaling", NoScaling)
     .value("LeftScaling", LeftScaling)
diff --git a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
new file mode 100644
index 000000000000..5f6e6cca3262
--- /dev/null
+++ b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
@@ -0,0 +1,380 @@
+//    |  /           |
+//    ' /   __| _` | __|  _ \   __|
+//    . \  |   (   | |   (   |\__ `
+//   _|\_\_|  \__,_|\__|\___/ ____/
+//                   Multi-Physics
+//
+//  License:		 BSD License
+//					 Kratos default license: kratos/license.txt
+//
+//  Main authors:    Denis Demidov
+//                   Riccardo Rossi
+//
+
+
+#if !defined(KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED )
+#define  KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED
+
+
+#ifndef AMGCL_PARAM_UNKNOWN
+#  define AMGCL_PARAM_UNKNOWN(name)                                            \
+      std::cerr << "AMGCL WARNING: unknown parameter " << name << std::endl
+#endif
+
+
+// External includes
+#include "boost/smart_ptr.hpp"
+
+// Project includes
+#include "includes/define.h"
+#include "includes/kratos_parameters.h"
+#include "linear_solvers/linear_solver.h"
+#include "external_includes/amgcl_solver.h"
+
+//aztec solver includes
+#include "AztecOO.h"
+#include "Epetra_LinearProblem.h"
+//#include "Teuchos_ParameterList.hpp"
+
+
+#include <amgcl/amg.hpp>
+#include <amgcl/adapter/epetra.hpp>
+#include <amgcl/profiler.hpp>
+
+#include <amgcl/mpi/make_solver.hpp>
+#include <amgcl/mpi/schur_pressure_correction.hpp>
+#include <amgcl/mpi/block_preconditioner.hpp>
+#include <amgcl/mpi/subdomain_deflation.hpp>
+#include <amgcl/mpi/direct_solver.hpp>
+#include <boost/property_tree/json_parser.hpp> //needed to print AMGCL internal settings
+
+
+
+namespace Kratos
+{
+
+
+
+template< class TSparseSpaceType, class TDenseSpaceType,
+          class TReordererType = Reorderer<TSparseSpaceType, TDenseSpaceType> >
+class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
+    TDenseSpaceType, TReordererType>
+{
+public:
+    /**
+     * Counted pointer of AmgclMPI_NS_Solver
+     */
+    KRATOS_CLASS_POINTER_DEFINITION ( AmgclMPI_NS_Solver );
+
+    typedef LinearSolver<TSparseSpaceType, TDenseSpaceType, TReordererType> BaseType;
+
+    typedef typename TSparseSpaceType::MatrixType SparseMatrixType;
+
+    typedef typename TSparseSpaceType::VectorType VectorType;
+
+    typedef typename TDenseSpaceType::MatrixType DenseMatrixType;
+
+
+    AmgclMPI_NS_Solver ( Parameters rParameters )
+    {
+        Parameters default_parameters( R"(
+                                       {
+                                       "solver_type" : "AmgclMPI_NS_Solver",
+                                       "krylov_type" : "fgmres",
+                                       "velocity_block_preconditioner" :
+                                        {
+                                            "krylov_type" : "lgmres",
+                                            "tolerance" : 1e-3,
+                                            "preconditioner_type" : "ilu0",
+                                            "max_iteration": 5
+                                        },
+                                        "pressure_block_preconditioner" :
+                                        {
+                                            "krylov_type" : "lgmres",
+                                            "tolerance" : 1e-2,
+                                            "preconditioner_type" : "spai0",
+                                            "max_iteration": 20
+                                        },
+                                       "tolerance" : 1e-9,
+                                       "gmres_krylov_space_dimension": 50,
+                                       "coarsening_type": "aggregation",
+                                       "max_iteration": 50,
+                                       "verbosity" : 1,
+                                       "scaling": false,
+                                       "coarse_enough" : 5000
+                                   }  )" );
+
+
+        //now validate agains defaults -- this also ensures no type mismatch
+        rParameters.ValidateAndAssignDefaults(default_parameters);
+
+        std::stringstream msg;
+
+        //validate if values are admissible
+        std::set<std::string> available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"};
+
+        //check velocity block settings
+        if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end())
+        {
+            msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl;
+            msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl;
+            KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str());
+        }    
+
+        mcoarse_enough = rParameters["coarse_enough"].GetInt();
+
+        mtol = rParameters["tolerance"].GetDouble();
+        mmax_it = rParameters["max_iteration"].GetInt();
+        mverbosity=rParameters["verbosity"].GetInt();
+        mprm.put("solver.type", rParameters["krylov_type"].GetString());
+        
+        if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres")
+            mprm.put("solver.M",  rParameters["gmres_krylov_space_dimension"].GetInt());
+
+        //setting velocity solver options
+        mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString());
+        mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble());
+        mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt());
+        mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
+
+        //setting pressure solver options
+        mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
+        mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
+        mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
+        mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
+        mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0);
+        mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1);
+        mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough);
+
+    }
+
+
+    /**
+     * Destructor
+     */
+    virtual ~AmgclMPI_NS_Solver() {}
+
+    /**
+     * Normal solve method.
+     * Solves the linear system Ax=b and puts the result on SystemVector& rX.
+     * rX is also th initial guess for iterative methods.
+     * @param rA. System matrix
+     * @param rX. Solution vector.
+     * @param rB. Right hand side vector.
+     */
+    bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB )
+    {
+        KRATOS_TRY
+
+        using amgcl::prof;
+        prof.reset();
+
+        amgcl::mpi::communicator world ( MPI_COMM_WORLD );
+        if ( mverbosity >=0 && world.rank == 0 ) {
+            std::cout << "World size: " << world.size << std::endl;
+        }
+
+        int chunk = rA.NumMyRows();
+        boost::iterator_range<double*> xrange ( rX.Values(), rX.Values() + chunk );
+        boost::iterator_range<double*> frange ( rB.Values(), rB.Values() + chunk );
+
+        if ( mverbosity > 1 && world.rank == 0 ) {
+            write_json ( std::cout, mprm );
+        }
+
+        typedef amgcl::backend::builtin<double> Backend;
+        
+        typedef  amgcl::mpi::make_solver<
+            amgcl::mpi::schur_pressure_correction<
+                amgcl::mpi::make_solver<
+                    amgcl::mpi::block_preconditioner<
+                        amgcl::runtime::relaxation::as_preconditioner<Backend>
+                    >,
+                    amgcl::runtime::iterative_solver
+                >,
+                amgcl::mpi::make_solver<
+                    amgcl::mpi::subdomain_deflation<
+                        amgcl::runtime::amg<Backend>,
+                        amgcl::runtime::iterative_solver,
+                        amgcl::runtime::mpi::direct_solver<double>
+                    >,
+                    amgcl::runtime::iterative_solver
+                >
+            >,
+            amgcl::runtime::iterative_solver
+            > SDD;
+
+        boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
+        mprm.put("precond.psolver.precond.num_def_vec", 1);
+        mprm.put("precond.psolver.precond.def_vec", &dv);
+        
+        mprm.put("precond.pmask", static_cast<void*>(&mp[0]));
+        mprm.put("precond.pmask_size", mp.size());
+
+        prof.tic ( "setup" );
+        SDD solve ( world, amgcl::backend::map ( rA ), mprm );
+        double tm_setup = prof.toc ( "setup" );
+
+        prof.tic ( "Solve" );
+        size_t iters;
+        double resid;
+        boost::tie ( iters, resid ) = solve ( frange, xrange );
+        double solve_tm = prof.toc ( "Solve" );
+
+
+
+        if ( rA.Comm().MyPID() == 0 ) {
+            if ( mverbosity > 0 ) {
+                std::cout
+                        << "------- AMGCL -------\n" << std::endl
+                        << "Iterations      : " << iters   << std::endl
+                        << "Error           : " << resid   << std::endl
+                        << "amgcl setup time: " << tm_setup   << std::endl
+                        << "amgcl solve time: " << solve_tm   << std::endl;
+            }
+
+            if ( mverbosity > 1 ) {
+                std::cout << prof  << std::endl;
+            }
+        }
+
+
+
+        return true;
+
+
+
+
+
+        KRATOS_CATCH ( "" );
+    }
+
+    /**
+     * Multi solve method for solving a set of linear systems with same coefficient matrix.
+     * Solves the linear system Ax=b and puts the result on SystemVector& rX.
+     * rX is also th initial guess for iterative methods.
+     * @param rA. System matrix
+     * @param rX. Solution vector.
+     * @param rB. Right hand side vector.
+     */
+    bool Solve ( SparseMatrixType& rA, DenseMatrixType& rX, DenseMatrixType& rB )
+    {
+
+        return false;
+    }
+
+    /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example
+    * when solving a mixed u-p problem, it is important to identify the row associated to v and p.
+    * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers
+    * which require knowledge on the spatial position of the nodes associated to a given dof.
+    * This function tells if the solver requires such data
+    */
+    virtual bool AdditionalPhysicalDataIsNeeded()
+    {
+        return true;
+    }
+
+    /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example
+     * when solving a mixed u-p problem, it is important to identify the row associated to v and p.
+     * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers
+     * which require knowledge on the spatial position of the nodes associated to a given dof.
+     * This function is the place to eventually provide such data
+     */
+    void ProvideAdditionalData (
+        SparseMatrixType& rA,
+        VectorType& rX,
+        VectorType& rB,
+        typename ModelPart::DofsArrayType& rdof_set,
+        ModelPart& r_model_part
+    )
+    {
+        int my_pid = rA.Comm().MyPID();
+
+        //filling the pressure mask
+        if(mp.size() != static_cast<unsigned int>(rA.NumMyRows())) mp.resize( rA.NumMyRows() );
+
+        int counter = 0.0;
+        for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++)
+        {
+            if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid )
+            {
+                mp[counter]  = (it->GetVariable().Key() == PRESSURE);
+                counter++;
+            }
+        }
+
+    }
+
+    /**
+     * Print information about this object.
+     */
+    void  PrintInfo ( std::ostream& rOStream ) const
+    {
+        rOStream << "AMGCL_MPI solver finished.";
+    }
+
+    /**
+     * Print object's data.
+     */
+    void  PrintData ( std::ostream& rOStream ) const
+    {
+    }
+
+private:
+
+    double mtol;
+    int mmax_it;
+    int mverbosity;
+    unsigned int mcoarse_enough;
+    bool muse_linear_deflation = false;
+    bool mprovide_coordinates = false;
+    bool muse_block_matrices_if_possible = false;
+
+    std::vector< char > mp; //pressure mask
+    boost::property_tree::ptree mprm;
+
+
+    /**
+     * Assignment operator.
+     */
+    AmgclMPI_NS_Solver& operator= ( const AmgclMPI_NS_Solver& Other );
+
+    /**
+     * Copy constructor.
+     */
+    AmgclMPI_NS_Solver ( const AmgclMPI_NS_Solver& Other );
+
+};
+
+
+/**
+ * input stream function
+ */
+template<class TSparseSpaceType, class TDenseSpaceType,class TReordererType>
+inline std::istream& operator >> ( std::istream& rIStream, AmgclMPI_NS_Solver< TSparseSpaceType,
+                                   TDenseSpaceType, TReordererType>& rThis )
+{
+    return rIStream;
+}
+
+/**
+ * output stream function
+ */
+template<class TSparseSpaceType, class TDenseSpaceType, class TReordererType>
+inline std::ostream& operator << ( std::ostream& rOStream,
+                                   const AmgclMPI_NS_Solver<TSparseSpaceType,
+                                   TDenseSpaceType, TReordererType>& rThis )
+{
+    rThis.PrintInfo ( rOStream );
+    rOStream << std::endl;
+    rThis.PrintData ( rOStream );
+
+    return rOStream;
+}
+
+
+}  // namespace Kratos.
+
+#endif // KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED  defined 
+
+
diff --git a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
index fd86f0169cb9..e610c456bc48 100644
--- a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
+++ b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
@@ -21,6 +21,8 @@ def ConstructSolver(configuration):
         linear_solver = MultiLevelSolver(configuration)
     elif(solver_type == "AmgclMPISolver"):
         linear_solver = AmgclMPISolver(configuration);
+    elif(solver_type == "AmgclMPI_NS_Solver"):
+        linear_solver = AmgclMPI_NS_Solver(configuration);
     #
     elif(solver_type == "Deflated Conjugate gradient"):
         raise Exception("not implemented within trilinos")

From a1b62e37bf9543344e8adf379e1b2957828aee84 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 1 Aug 2017 15:03:08 +0200
Subject: [PATCH 02/12] some debugging output

---
 .../external_includes/mpi_amgcl_ns_solver.h   | 153 ++++++++++--------
 1 file changed, 82 insertions(+), 71 deletions(-)

diff --git a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
index 5f6e6cca3262..52521a58bd55 100644
--- a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
+++ b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
@@ -77,74 +77,74 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
 
     AmgclMPI_NS_Solver ( Parameters rParameters )
     {
-        Parameters default_parameters( R"(
-                                       {
-                                       "solver_type" : "AmgclMPI_NS_Solver",
-                                       "krylov_type" : "fgmres",
-                                       "velocity_block_preconditioner" :
-                                        {
-                                            "krylov_type" : "lgmres",
-                                            "tolerance" : 1e-3,
-                                            "preconditioner_type" : "ilu0",
-                                            "max_iteration": 5
-                                        },
-                                        "pressure_block_preconditioner" :
-                                        {
-                                            "krylov_type" : "lgmres",
-                                            "tolerance" : 1e-2,
-                                            "preconditioner_type" : "spai0",
-                                            "max_iteration": 20
-                                        },
-                                       "tolerance" : 1e-9,
-                                       "gmres_krylov_space_dimension": 50,
-                                       "coarsening_type": "aggregation",
-                                       "max_iteration": 50,
-                                       "verbosity" : 1,
-                                       "scaling": false,
-                                       "coarse_enough" : 5000
-                                   }  )" );
-
-
-        //now validate agains defaults -- this also ensures no type mismatch
-        rParameters.ValidateAndAssignDefaults(default_parameters);
-
-        std::stringstream msg;
-
-        //validate if values are admissible
-        std::set<std::string> available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"};
-
-        //check velocity block settings
-        if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end())
-        {
-            msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl;
-            msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl;
-            KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str());
-        }    
-
-        mcoarse_enough = rParameters["coarse_enough"].GetInt();
-
-        mtol = rParameters["tolerance"].GetDouble();
-        mmax_it = rParameters["max_iteration"].GetInt();
-        mverbosity=rParameters["verbosity"].GetInt();
-        mprm.put("solver.type", rParameters["krylov_type"].GetString());
-        
-        if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres")
-            mprm.put("solver.M",  rParameters["gmres_krylov_space_dimension"].GetInt());
-
-        //setting velocity solver options
-        mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString());
-        mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble());
-        mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt());
-        mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
-
-        //setting pressure solver options
-        mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
-        mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
-        mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
-        mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
-        mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0);
-        mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1);
-        mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough);
+//         Parameters default_parameters( R"(
+//                                        {
+//                                        "solver_type" : "AmgclMPI_NS_Solver",
+//                                        "krylov_type" : "fgmres",
+//                                        "velocity_block_preconditioner" :
+//                                         {
+//                                             "krylov_type" : "lgmres",
+//                                             "tolerance" : 1e-3,
+//                                             "preconditioner_type" : "ilu0",
+//                                             "max_iteration": 5
+//                                         },
+//                                         "pressure_block_preconditioner" :
+//                                         {
+//                                             "krylov_type" : "lgmres",
+//                                             "tolerance" : 1e-2,
+//                                             "preconditioner_type" : "spai0",
+//                                             "max_iteration": 20
+//                                         },
+//                                        "tolerance" : 1e-9,
+//                                        "gmres_krylov_space_dimension": 50,
+//                                        "coarsening_type": "aggregation",
+//                                        "max_iteration": 50,
+//                                        "verbosity" : 1,
+//                                        "scaling": false,
+//                                        "coarse_enough" : 5000
+//                                    }  )" );
+// 
+// 
+//         //now validate agains defaults -- this also ensures no type mismatch
+//         rParameters.ValidateAndAssignDefaults(default_parameters);
+// 
+//         std::stringstream msg;
+// 
+//         //validate if values are admissible
+//         std::set<std::string> available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"};
+// 
+//         //check velocity block settings
+//         if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end())
+//         {
+//             msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl;
+//             msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl;
+//             KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str());
+//         }    
+// 
+//         mcoarse_enough = rParameters["coarse_enough"].GetInt();
+// 
+//         mtol = rParameters["tolerance"].GetDouble();
+//         mmax_it = rParameters["max_iteration"].GetInt();
+//         mverbosity=rParameters["verbosity"].GetInt();
+//         mprm.put("solver.type", rParameters["krylov_type"].GetString());
+//         
+//         if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres")
+//             mprm.put("solver.M",  rParameters["gmres_krylov_space_dimension"].GetInt());
+// 
+//         //setting velocity solver options
+//         mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString());
+//         mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble());
+//         mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt());
+//         mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
+// 
+//         //setting pressure solver options
+//         mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
+//         mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
+//         mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
+//         mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
+//         mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0);
+//         mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1);
+//         mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough);
 
     }
 
@@ -165,22 +165,26 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
     bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB )
     {
         KRATOS_TRY
-
+KRATOS_WATCH(__LINE__)
         using amgcl::prof;
         prof.reset();
+KRATOS_WATCH(__LINE__)
 
         amgcl::mpi::communicator world ( MPI_COMM_WORLD );
         if ( mverbosity >=0 && world.rank == 0 ) {
             std::cout << "World size: " << world.size << std::endl;
         }
+KRATOS_WATCH(__LINE__)
 
         int chunk = rA.NumMyRows();
         boost::iterator_range<double*> xrange ( rX.Values(), rX.Values() + chunk );
         boost::iterator_range<double*> frange ( rB.Values(), rB.Values() + chunk );
+KRATOS_WATCH(__LINE__)
 
         if ( mverbosity > 1 && world.rank == 0 ) {
             write_json ( std::cout, mprm );
         }
+KRATOS_WATCH(__LINE__)
 
         typedef amgcl::backend::builtin<double> Backend;
         
@@ -203,6 +207,7 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
             >,
             amgcl::runtime::iterative_solver
             > SDD;
+KRATOS_WATCH(__LINE__)
 
         boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
         mprm.put("precond.psolver.precond.num_def_vec", 1);
@@ -210,10 +215,12 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
         
         mprm.put("precond.pmask", static_cast<void*>(&mp[0]));
         mprm.put("precond.pmask_size", mp.size());
+KRATOS_WATCH(__LINE__)
 
         prof.tic ( "setup" );
         SDD solve ( world, amgcl::backend::map ( rA ), mprm );
         double tm_setup = prof.toc ( "setup" );
+KRATOS_WATCH(__LINE__)
 
         prof.tic ( "Solve" );
         size_t iters;
@@ -288,12 +295,13 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
         ModelPart& r_model_part
     )
     {
+KRATOS_WATCH(__LINE__)
         int my_pid = rA.Comm().MyPID();
 
         //filling the pressure mask
         if(mp.size() != static_cast<unsigned int>(rA.NumMyRows())) mp.resize( rA.NumMyRows() );
-
-        int counter = 0.0;
+KRATOS_WATCH(__LINE__)
+        int counter = 0;
         for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++)
         {
             if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid )
@@ -302,6 +310,9 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
                 counter++;
             }
         }
+KRATOS_WATCH(__LINE__)
+        if(counter != rA.NumMyRows())
+            KRATOS_ERROR << "pressure mask as a size " << mp.size() << " which does not correspond with the number of local rows:" << rA.NumMyRows() << std::endl; 
 
     }
 

From c7d3400a06d6b23e464061b870d1433342f86459 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 8 Aug 2017 08:04:45 +0200
Subject: [PATCH 03/12] some corrections to mpi solvers

---
 .../trilinos_linear_solver_factory.py         |   5 +-
 .../amgcl/mpi/subdomain_deflation.hpp         | 142 ++++++++++--------
 2 files changed, 85 insertions(+), 62 deletions(-)

diff --git a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
index e610c456bc48..853140f1616e 100644
--- a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
+++ b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
@@ -55,10 +55,11 @@ def ConstructSolver(configuration):
         raise Exception("not implemented within trilinos")
     else:
         print("*****************************************************************")
-        print("Inexisting solver type. Possibilities are:")
+        print("Inexisting solver type. Specified::::::::::: ", solver_type) 
+        print(" Possibilities are:")
         print("............")
         print("*****************************************************************")
-
+        raise Exception("please specify a correct solver")
     # else:
         # except LogicError:
 
diff --git a/external_libraries/amgcl/mpi/subdomain_deflation.hpp b/external_libraries/amgcl/mpi/subdomain_deflation.hpp
index 79e7a4ae68bc..f9c74f3cc2d8 100644
--- a/external_libraries/amgcl/mpi/subdomain_deflation.hpp
+++ b/external_libraries/amgcl/mpi/subdomain_deflation.hpp
@@ -145,7 +145,7 @@ class subdomain_deflation {
           q( backend_type::create_vector(nrows, bprm) ),
           S(nrows, prm.isolver, bprm, mpi::inner_product(mpi_comm))
         {
-            TIC("setup deflation");
+            AMGCL_TIC("setup deflation");
             typedef backend::crs<value_type, ptrdiff_t>                build_matrix;
             typedef typename backend::row_iterator<Matrix>::type       row_iterator1;
             typedef typename backend::row_iterator<build_matrix>::type row_iterator2;
@@ -170,7 +170,7 @@ class subdomain_deflation {
             ptrdiff_t loc_end = domain[comm.rank + 1];
 
             // Fill deflation vectors.
-            TIC("copy deflation vectors");
+            AMGCL_TIC("copy deflation vectors");
             {
                 std::vector<value_type> z(nrows);
                 for(int j = 0; j < ndv; ++j) {
@@ -179,12 +179,12 @@ class subdomain_deflation {
                     Z[j] = backend_type::copy_vector(z, bprm);
                 }
             }
-            TOC("copy deflation vectors");
+            AMGCL_TOC("copy deflation vectors");
 
             // Number of nonzeros in local and remote parts of the matrix.
             ptrdiff_t loc_nnz = 0, rem_nnz = 0;
 
-            TIC("first pass");
+            AMGCL_TIC("first pass");
             // First pass over Astrip rows:
             // 1. Count local and remote nonzeros,
             // 3. Build sparsity pattern of matrix AZ.
@@ -218,9 +218,9 @@ class subdomain_deflation {
                     }
                 }
             }
-            TOC("first pass");
+            AMGCL_TOC("first pass");
 
-            TIC("second pass");
+            AMGCL_TIC("second pass");
             // Second pass over Astrip rows:
             // 1. Build local and remote matrix parts.
             // 2. Build local part of AZ matrix.
@@ -273,7 +273,7 @@ class subdomain_deflation {
                 aloc->ptr[i+1] = loc_head;
                 arem->ptr[i+1] = rem_head;
             }
-            TOC("second pass");
+            AMGCL_TOC("second pass");
 
             // Create local preconditioner.
             P = boost::make_shared<LocalPrecond>( *aloc, prm.local, bprm );
@@ -284,7 +284,7 @@ class subdomain_deflation {
             Arem = backend_type::copy_matrix(arem, bprm);
             A = boost::make_shared<matrix>(*C, P->system_matrix(), *Arem);
 
-            TIC("A*Z");
+            AMGCL_TIC("A*Z");
             /* Finish construction of AZ */
             // Exchange deflation vectors
             std::vector<ptrdiff_t> zrecv_ptr(C->recv.nbr.size() + 1, 0);
@@ -358,12 +358,12 @@ class subdomain_deflation {
 
             std::rotate(az->ptr, az->ptr + nrows, az->ptr + nrows + 1);
             az->ptr[0] = 0;
-            TOC("A*Z");
+            AMGCL_TOC("A*Z");
 
             MPI_Waitall(C->send.req.size(), &C->send.req[0], MPI_STATUSES_IGNORE);
 
             /* Build deflated matrix E. */
-            TIC("assemble E");
+            AMGCL_TIC("assemble E");
             // Who is responsible for solution of coarse problem
             int nmasters = std::min(comm.size, DirectSolver::comm_size(nz, prm.dsolver));
             int nslaves  = (comm.size + nmasters - 1) / nmasters;
@@ -408,13 +408,18 @@ class subdomain_deflation {
             std::vector<int> Eptr;
             if (comm.rank == master_rank) {
                 Eptr.resize(dv_start[cgroup_end] - dv_start[cgroup_beg] + 1, 0);
-            }
 
-            MPI_Gatherv(
-                    &eptr[1], ndv, MPI_INT, &Eptr[0] + 1,
-                    const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
-                    MPI_INT, 0, slaves_comm
-                    );
+                MPI_Gatherv(
+                        &eptr[1], ndv, MPI_INT, &Eptr[0] + 1,
+                        const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
+                        MPI_INT, 0, slaves_comm
+                        );
+            } else {
+                MPI_Gatherv(
+                        &eptr[1], ndv, MPI_INT, NULL, NULL, NULL,
+                        MPI_INT, 0, slaves_comm
+                        );
+            }
 
             std::partial_sum(eptr.begin(), eptr.end(), eptr.begin());
             std::partial_sum(Eptr.begin(), Eptr.end(), Eptr.begin());
@@ -460,23 +465,33 @@ class subdomain_deflation {
                     sstart[i] = Eptr[dv_start[p]     - offset];
                     ssize[i]  = Eptr[dv_start[p + 1] - offset] - sstart[i];
                 }
-            }
 
-            MPI_Gatherv(
-                    &ecol[0], ecol.size(), MPI_INT, &Ecol[0],
-                    const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
-                    MPI_INT, 0, slaves_comm
-                    );
+                MPI_Gatherv(
+                        &ecol[0], ecol.size(), MPI_INT, &Ecol[0],
+                        const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
+                        MPI_INT, 0, slaves_comm
+                        );
 
-            MPI_Gatherv(
-                    &eval[0], eval.size(), dtype, &Eval[0],
-                    const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
-                    dtype, 0, slaves_comm
-                    );
-            TOC("assemble E");
+                MPI_Gatherv(
+                        &eval[0], eval.size(), dtype, &Eval[0],
+                        const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
+                        dtype, 0, slaves_comm
+                        );
+            } else {
+                MPI_Gatherv(
+                        &ecol[0], ecol.size(), MPI_INT, NULL, NULL, NULL,
+                        MPI_INT, 0, slaves_comm
+                        );
+
+                MPI_Gatherv(
+                        &eval[0], eval.size(), dtype, NULL, NULL, NULL,
+                        dtype, 0, slaves_comm
+                        );
+            }
+            AMGCL_TOC("assemble E");
 
             // Prepare E factorization.
-            TIC("factorize E");
+            AMGCL_TIC("factorize E");
             if (comm.rank == master_rank) {
                 E = boost::make_shared<DirectSolver>(
                         masters_comm, Eptr.size() - 1, Eptr, Ecol, Eval, prm.dsolver
@@ -485,9 +500,9 @@ class subdomain_deflation {
                 cf.resize(Eptr.size() - 1);
                 cx.resize(Eptr.size() - 1);
             }
-            TOC("factorize E");
+            AMGCL_TOC("factorize E");
 
-            TOC("setup deflation");
+            AMGCL_TOC("setup deflation");
 
             // Move matrices to backend.
             AZ = backend_type::copy_matrix(az, bprm);
@@ -546,18 +561,18 @@ class subdomain_deflation {
 
         template <class Vec1, class Vec2>
         void mul(value_type alpha, const Vec1 &x, value_type beta, Vec2 &y) const {
-            TIC("top/spmv");
+            AMGCL_TIC("top/spmv");
             backend::spmv(alpha, *A, x, beta, y);
-            TOC("top/spmv");
+            AMGCL_TOC("top/spmv");
 
             project(y);
         }
 
         template <class Vec1, class Vec2, class Vec3>
         void residual(const Vec1 &f, const Vec2 &x, Vec3 &r) const {
-            TIC("top/residual");
+            AMGCL_TIC("top/residual");
             backend::residual(f, *A, x, r);
-            TOC("top/residual");
+            AMGCL_TOC("top/residual");
 
             project(r);
         }
@@ -599,67 +614,74 @@ class subdomain_deflation {
 
         template <class Vector>
         void project(Vector &x) const {
-            TIC("project");
+            AMGCL_TIC("project");
 
-            TIC("local inner product");
+            AMGCL_TIC("local inner product");
             for(ptrdiff_t j = 0; j < ndv; ++j)
                 df[j] = backend::inner_product(x, *Z[j]);
-            TOC("local inner product");
+            AMGCL_TOC("local inner product");
 
             coarse_solve(df, dx);
 
-            TIC("spmv");
+            AMGCL_TIC("spmv");
             backend::copy_to_backend(dx, *dd);
             backend::spmv(-1, *AZ, *dd, 1, x);
-            TOC("spmv");
+            AMGCL_TOC("spmv");
 
-            TOC("project");
+            AMGCL_TOC("project");
         }
 
         void coarse_solve(std::vector<value_type> &f, std::vector<value_type> &x) const
         {
-            TIC("coarse solve");
-            TIC("exchange rhs");
-            MPI_Gatherv(
-                    &f[0], f.size(), dtype, &cf[0],
-                    const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
-                    dtype, 0, slaves_comm
-                    );
-            TOC("exchange rhs");
+            AMGCL_TIC("coarse solve");
+            AMGCL_TIC("exchange rhs");
+            if (comm.rank == master_rank) {
+                MPI_Gatherv(
+                        &f[0], f.size(), dtype, &cf[0],
+                        const_cast<int*>(&ssize[0]), const_cast<int*>(&sstart[0]),
+                        dtype, 0, slaves_comm
+                        );
+            } else {
+                MPI_Gatherv(
+                        &f[0], f.size(), dtype, NULL, NULL, NULL,
+                        dtype, 0, slaves_comm
+                        );
+            }
+            AMGCL_TOC("exchange rhs");
 
             if (comm.rank == master_rank) {
-                TIC("call solver");
+                AMGCL_TIC("call solver");
                 (*E)(cf, cx);
-                TOC("call solver");
+                AMGCL_TOC("call solver");
 
-                TIC("gather result");
+                AMGCL_TIC("gather result");
                 MPI_Gatherv(
                         &cx[0], cx.size(), dtype, &x[0],
                         const_cast<int*>(&msize[0]), const_cast<int*>(&mstart[0]),
                         dtype, 0, masters_comm
                         );
-                TOC("gather result");
+                AMGCL_TOC("gather result");
             }
 
-            TIC("broadcast result");
+            AMGCL_TIC("broadcast result");
             MPI_Bcast(&x[0], x.size(), dtype, 0, comm);
-            TOC("broadcast result");
-            TOC("coarse solve");
+            AMGCL_TOC("broadcast result");
+            AMGCL_TOC("coarse solve");
         }
 
         template <class Vec1, class Vec2>
         void postprocess(const Vec1 &rhs, Vec2 &x) const {
-            TIC("postprocess");
+            AMGCL_TIC("postprocess");
 
             // q = Ax
             backend::spmv(1, *A, x, 0, *q);
 
             // df = transp(Z) * (rhs - Ax)
-            TIC("local inner product");
+            AMGCL_TIC("local inner product");
             for(ptrdiff_t j = 0; j < ndv; ++j)
                 df[j] = backend::inner_product(rhs, *Z[j])
                       - backend::inner_product(*q,  *Z[j]);
-            TOC("local inner product");
+            AMGCL_TOC("local inner product");
 
             // dx = inv(E) * df
             coarse_solve(df, dx);
@@ -667,7 +689,7 @@ class subdomain_deflation {
             // x += Z * dx
             backend::lin_comb(ndv, &dx[dv_start[comm.rank]], Z, 1, x);
 
-            TOC("postprocess");
+            AMGCL_TOC("postprocess");
         }
 
 };

From 9bb13caaedfa544dec02d68f2544a5281c00b51f Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 8 Aug 2017 08:38:05 +0200
Subject: [PATCH 04/12] renaming a macro

---
 .../amgcl/mpi/subdomain_deflation.hpp         | 76 +++++++++----------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/external_libraries/amgcl/mpi/subdomain_deflation.hpp b/external_libraries/amgcl/mpi/subdomain_deflation.hpp
index f9c74f3cc2d8..97aae2c3e1d1 100644
--- a/external_libraries/amgcl/mpi/subdomain_deflation.hpp
+++ b/external_libraries/amgcl/mpi/subdomain_deflation.hpp
@@ -145,7 +145,7 @@ class subdomain_deflation {
           q( backend_type::create_vector(nrows, bprm) ),
           S(nrows, prm.isolver, bprm, mpi::inner_product(mpi_comm))
         {
-            AMGCL_TIC("setup deflation");
+            TIC("setup deflation");
             typedef backend::crs<value_type, ptrdiff_t>                build_matrix;
             typedef typename backend::row_iterator<Matrix>::type       row_iterator1;
             typedef typename backend::row_iterator<build_matrix>::type row_iterator2;
@@ -170,7 +170,7 @@ class subdomain_deflation {
             ptrdiff_t loc_end = domain[comm.rank + 1];
 
             // Fill deflation vectors.
-            AMGCL_TIC("copy deflation vectors");
+            TIC("copy deflation vectors");
             {
                 std::vector<value_type> z(nrows);
                 for(int j = 0; j < ndv; ++j) {
@@ -179,12 +179,12 @@ class subdomain_deflation {
                     Z[j] = backend_type::copy_vector(z, bprm);
                 }
             }
-            AMGCL_TOC("copy deflation vectors");
+            TOC("copy deflation vectors");
 
             // Number of nonzeros in local and remote parts of the matrix.
             ptrdiff_t loc_nnz = 0, rem_nnz = 0;
 
-            AMGCL_TIC("first pass");
+            TIC("first pass");
             // First pass over Astrip rows:
             // 1. Count local and remote nonzeros,
             // 3. Build sparsity pattern of matrix AZ.
@@ -218,9 +218,9 @@ class subdomain_deflation {
                     }
                 }
             }
-            AMGCL_TOC("first pass");
+            TOC("first pass");
 
-            AMGCL_TIC("second pass");
+            TIC("second pass");
             // Second pass over Astrip rows:
             // 1. Build local and remote matrix parts.
             // 2. Build local part of AZ matrix.
@@ -273,7 +273,7 @@ class subdomain_deflation {
                 aloc->ptr[i+1] = loc_head;
                 arem->ptr[i+1] = rem_head;
             }
-            AMGCL_TOC("second pass");
+            TOC("second pass");
 
             // Create local preconditioner.
             P = boost::make_shared<LocalPrecond>( *aloc, prm.local, bprm );
@@ -284,7 +284,7 @@ class subdomain_deflation {
             Arem = backend_type::copy_matrix(arem, bprm);
             A = boost::make_shared<matrix>(*C, P->system_matrix(), *Arem);
 
-            AMGCL_TIC("A*Z");
+            TIC("A*Z");
             /* Finish construction of AZ */
             // Exchange deflation vectors
             std::vector<ptrdiff_t> zrecv_ptr(C->recv.nbr.size() + 1, 0);
@@ -358,12 +358,12 @@ class subdomain_deflation {
 
             std::rotate(az->ptr, az->ptr + nrows, az->ptr + nrows + 1);
             az->ptr[0] = 0;
-            AMGCL_TOC("A*Z");
+            TOC("A*Z");
 
             MPI_Waitall(C->send.req.size(), &C->send.req[0], MPI_STATUSES_IGNORE);
 
             /* Build deflated matrix E. */
-            AMGCL_TIC("assemble E");
+            TIC("assemble E");
             // Who is responsible for solution of coarse problem
             int nmasters = std::min(comm.size, DirectSolver::comm_size(nz, prm.dsolver));
             int nslaves  = (comm.size + nmasters - 1) / nmasters;
@@ -488,10 +488,10 @@ class subdomain_deflation {
                         dtype, 0, slaves_comm
                         );
             }
-            AMGCL_TOC("assemble E");
+            TOC("assemble E");
 
             // Prepare E factorization.
-            AMGCL_TIC("factorize E");
+            TIC("factorize E");
             if (comm.rank == master_rank) {
                 E = boost::make_shared<DirectSolver>(
                         masters_comm, Eptr.size() - 1, Eptr, Ecol, Eval, prm.dsolver
@@ -500,9 +500,9 @@ class subdomain_deflation {
                 cf.resize(Eptr.size() - 1);
                 cx.resize(Eptr.size() - 1);
             }
-            AMGCL_TOC("factorize E");
+            TOC("factorize E");
 
-            AMGCL_TOC("setup deflation");
+            TOC("setup deflation");
 
             // Move matrices to backend.
             AZ = backend_type::copy_matrix(az, bprm);
@@ -561,18 +561,18 @@ class subdomain_deflation {
 
         template <class Vec1, class Vec2>
         void mul(value_type alpha, const Vec1 &x, value_type beta, Vec2 &y) const {
-            AMGCL_TIC("top/spmv");
+            TIC("top/spmv");
             backend::spmv(alpha, *A, x, beta, y);
-            AMGCL_TOC("top/spmv");
+            TOC("top/spmv");
 
             project(y);
         }
 
         template <class Vec1, class Vec2, class Vec3>
         void residual(const Vec1 &f, const Vec2 &x, Vec3 &r) const {
-            AMGCL_TIC("top/residual");
+            TIC("top/residual");
             backend::residual(f, *A, x, r);
-            AMGCL_TOC("top/residual");
+            TOC("top/residual");
 
             project(r);
         }
@@ -614,27 +614,27 @@ class subdomain_deflation {
 
         template <class Vector>
         void project(Vector &x) const {
-            AMGCL_TIC("project");
+            TIC("project");
 
-            AMGCL_TIC("local inner product");
+            TIC("local inner product");
             for(ptrdiff_t j = 0; j < ndv; ++j)
                 df[j] = backend::inner_product(x, *Z[j]);
-            AMGCL_TOC("local inner product");
+            TOC("local inner product");
 
             coarse_solve(df, dx);
 
-            AMGCL_TIC("spmv");
+            TIC("spmv");
             backend::copy_to_backend(dx, *dd);
             backend::spmv(-1, *AZ, *dd, 1, x);
-            AMGCL_TOC("spmv");
+            TOC("spmv");
 
-            AMGCL_TOC("project");
+            TOC("project");
         }
 
         void coarse_solve(std::vector<value_type> &f, std::vector<value_type> &x) const
         {
-            AMGCL_TIC("coarse solve");
-            AMGCL_TIC("exchange rhs");
+            TIC("coarse solve");
+            TIC("exchange rhs");
             if (comm.rank == master_rank) {
                 MPI_Gatherv(
                         &f[0], f.size(), dtype, &cf[0],
@@ -647,41 +647,41 @@ class subdomain_deflation {
                         dtype, 0, slaves_comm
                         );
             }
-            AMGCL_TOC("exchange rhs");
+            TOC("exchange rhs");
 
             if (comm.rank == master_rank) {
-                AMGCL_TIC("call solver");
+                TIC("call solver");
                 (*E)(cf, cx);
-                AMGCL_TOC("call solver");
+                TOC("call solver");
 
-                AMGCL_TIC("gather result");
+                TIC("gather result");
                 MPI_Gatherv(
                         &cx[0], cx.size(), dtype, &x[0],
                         const_cast<int*>(&msize[0]), const_cast<int*>(&mstart[0]),
                         dtype, 0, masters_comm
                         );
-                AMGCL_TOC("gather result");
+                TOC("gather result");
             }
 
-            AMGCL_TIC("broadcast result");
+            TIC("broadcast result");
             MPI_Bcast(&x[0], x.size(), dtype, 0, comm);
-            AMGCL_TOC("broadcast result");
-            AMGCL_TOC("coarse solve");
+            TOC("broadcast result");
+            TOC("coarse solve");
         }
 
         template <class Vec1, class Vec2>
         void postprocess(const Vec1 &rhs, Vec2 &x) const {
-            AMGCL_TIC("postprocess");
+            TIC("postprocess");
 
             // q = Ax
             backend::spmv(1, *A, x, 0, *q);
 
             // df = transp(Z) * (rhs - Ax)
-            AMGCL_TIC("local inner product");
+            TIC("local inner product");
             for(ptrdiff_t j = 0; j < ndv; ++j)
                 df[j] = backend::inner_product(rhs, *Z[j])
                       - backend::inner_product(*q,  *Z[j]);
-            AMGCL_TOC("local inner product");
+            TOC("local inner product");
 
             // dx = inv(E) * df
             coarse_solve(df, dx);
@@ -689,7 +689,7 @@ class subdomain_deflation {
             // x += Z * dx
             backend::lin_comb(ndv, &dx[dv_start[comm.rank]], Z, 1, x);
 
-            AMGCL_TOC("postprocess");
+            TOC("postprocess");
         }
 
 };

From 3b5970aef5c850d803298df14728413b7b75fe71 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 8 Aug 2017 10:03:46 +0200
Subject: [PATCH 05/12] renaming and cleaning up the amgcl_mpi_solvers

---
 .../add_trilinos_linear_solvers_to_python.cpp |  34 +-
 .../amgcl_mpi_schur_complement_solver.h       | 378 ++++++++++++++++++
 .../trilinos_linear_solver_factory.py         |   4 +-
 3 files changed, 385 insertions(+), 31 deletions(-)
 create mode 100644 applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h

diff --git a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp
index dd10f7bb4bb8..72c0211bb9a4 100644
--- a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp
+++ b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp
@@ -34,30 +34,6 @@
 #include "spaces/ublas_space.h"
 #include "includes/model_part.h"
 
-//strategies
-// #include "solving_strategies/strategies/solving_strategy.h"
-// #include "solving_strategies/strategies/residualbased_linear_strategy.h"
-// #include "solving_strategies/strategies/residualbased_newton_raphson_strategy.h"
-
-//schemes
-// #include "solving_strategies/schemes/scheme.h"
-// #include "custom_strategies/schemes/trilinos_residualbased_incrementalupdate_static_scheme.h"
-// #include "custom_strategies/schemes/trilinos_residualbased_lagrangian_monolithic_scheme.h"
-// #include "../../incompressible_fluid_application/custom_strategies/strategies/residualbased_predictorcorrector_velocity_bossak_scheme.h"
-// #include "custom_strategies/schemes/trilinos_predictorcorrector_velocity_bossak_scheme.h"
-
-//convergence criterias
-// #include "solving_strategies/convergencecriterias/convergence_criteria.h"
-// #include "solving_strategies/convergencecriterias/displacement_criteria.h"
-//
-// //Builder And Solver
-// // #include "solving_strategies/builder_and_solvers/builder_and_solver.h"
-// #include "custom_strategies/builder_and_solvers/trilinos_residualbased_elimination_builder_and_solver.h"
-// #include "custom_strategies/convergencecriterias/trilinos_displacement_criteria.h"
-// #include "custom_strategies/convergencecriterias/trilinos_up_criteria.h"
-// #include "custom_strategies/builder_and_solvers/trilinos_builder_and_solver_ML.h"
-// #include "custom_strategies/builder_and_solvers/trilinos_builder_and_solver_ML_vec.h"
-// #include "custom_strategies/builder_and_solvers/trilinos_builder_and_solver_ML_mixed.h"
 
 //linear solvers
 #include "linear_solvers/linear_solver.h"
@@ -73,8 +49,8 @@
 #include "external_includes/amesos_solver.h"
 #include "external_includes/ml_solver.h"
 
-#include "external_includes/amgcl_solver.h"
-#include "external_includes/mpi_amgcl_ns_solver.h"
+#include "external_includes/amgcl_mpi_solver.h"
+#include "external_includes/amgcl_mpi_schur_complement_solver.h"
 
 namespace Kratos
 {
@@ -136,9 +112,9 @@ void  AddLinearSolvers()
     .def("SetIntParameter", &AmgclMPISolverType::SetIntParameter)
     ;
     
-    typedef AmgclMPI_NS_Solver<TrilinosSparseSpaceType, TrilinosLocalSpaceType > AmgclMPI_NS_SolverType;
-    class_<AmgclMPI_NS_SolverType, bases<TrilinosLinearSolverType>, boost::noncopyable >
-    ("AmgclMPI_NS_Solver",init<Parameters>()) 
+    typedef AmgclMPISchurComplementSolver<TrilinosSparseSpaceType, TrilinosLocalSpaceType > AmgclMPISchurComplementSolverType;
+    class_<AmgclMPISchurComplementSolverType, bases<TrilinosLinearSolverType>, boost::noncopyable >
+    ("AmgclMPISchurComplementSolver",init<Parameters>()) 
     ;
     
     enum_<AztecScalingType>("AztecScalingType")
diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
new file mode 100644
index 000000000000..07965a245877
--- /dev/null
+++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
@@ -0,0 +1,378 @@
+//    |  /           |
+//    ' /   __| _` | __|  _ \   __|
+//    . \  |   (   | |   (   |\__ `
+//   _|\_\_|  \__,_|\__|\___/ ____/
+//                   Multi-Physics
+//
+//  License:		 BSD License
+//					 Kratos default license: kratos/license.txt
+//
+//  Main authors:    Denis Demidov
+//                   Riccardo Rossi
+//
+
+
+#if !defined(KRATOS_AMGCL_MPI_SCHUR_COMPLEMENT_SOLVER_H_INCLUDED )
+#define  KRATOS_AMGCL_MPI_SCHUR_COMPLEMENT_SOLVER_H_INCLUDED
+
+
+#ifndef AMGCL_PARAM_UNKNOWN
+#  define AMGCL_PARAM_UNKNOWN(name)                                            \
+      std::cerr << "AMGCL WARNING: unknown parameter " << name << std::endl
+#endif
+
+
+// External includes
+#include "boost/smart_ptr.hpp"
+
+// Project includes
+#include "includes/define.h"
+#include "includes/kratos_parameters.h"
+#include "linear_solvers/linear_solver.h"
+#include "external_includes/amgcl_mpi_solver.h"
+
+//aztec solver includes
+#include "AztecOO.h"
+#include "Epetra_LinearProblem.h"
+//#include "Teuchos_ParameterList.hpp"
+
+
+#include <amgcl/amg.hpp>
+#include <amgcl/adapter/epetra.hpp>
+#include <amgcl/profiler.hpp>
+
+#include <amgcl/mpi/make_solver.hpp>
+#include <amgcl/mpi/schur_pressure_correction.hpp>
+#include <amgcl/mpi/block_preconditioner.hpp>
+#include <amgcl/mpi/subdomain_deflation.hpp>
+#include <amgcl/mpi/direct_solver.hpp>
+#include <boost/property_tree/json_parser.hpp> //needed to print AMGCL internal settings
+
+
+
+namespace Kratos
+{
+
+
+
+template< class TSparseSpaceType, class TDenseSpaceType,
+          class TReordererType = Reorderer<TSparseSpaceType, TDenseSpaceType> >
+class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
+    TDenseSpaceType, TReordererType>
+{
+public:
+    /**
+     * Counted pointer of AmgclMPISchurComplementSolver
+     */
+    KRATOS_CLASS_POINTER_DEFINITION ( AmgclMPISchurComplementSolver );
+
+    typedef LinearSolver<TSparseSpaceType, TDenseSpaceType, TReordererType> BaseType;
+
+    typedef typename TSparseSpaceType::MatrixType SparseMatrixType;
+
+    typedef typename TSparseSpaceType::VectorType VectorType;
+
+    typedef typename TDenseSpaceType::MatrixType DenseMatrixType;
+
+
+    AmgclMPISchurComplementSolver ( Parameters rParameters )
+    {
+        Parameters default_parameters( R"(
+                                       {
+                                       "solver_type" : "AmgclMPISchurComplementSolver",
+                                       "krylov_type" : "fgmres",
+                                       "velocity_block_preconditioner" :
+                                        {
+                                            "krylov_type" : "lgmres",
+                                            "tolerance" : 1e-3,
+                                            "preconditioner_type" : "ilu0",
+                                            "max_iteration": 5
+                                        },
+                                        "pressure_block_preconditioner" :
+                                        {
+                                            "krylov_type" : "lgmres",
+                                            "tolerance" : 1e-2,
+                                            "preconditioner_type" : "spai0",
+                                            "max_iteration": 20
+                                        },
+                                       "tolerance" : 1e-9,
+                                       "gmres_krylov_space_dimension": 50,
+                                       "coarsening_type": "aggregation",
+                                       "max_iteration": 50,
+                                       "verbosity" : 1,
+                                       "scaling": false,
+                                       "coarse_enough" : 5000
+                                   }  )" );
+
+
+        //now validate agains defaults -- this also ensures no type mismatch
+        rParameters.ValidateAndAssignDefaults(default_parameters);
+
+        std::stringstream msg;
+
+        //validate if values are admissible
+        std::set<std::string> available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"};
+
+        //check velocity block settings
+        if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end())
+        {
+            msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl;
+            msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl;
+            KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str());
+        }    
+
+        mCoarseEnough = rParameters["coarse_enough"].GetInt();
+
+        mTolerance = rParameters["tolerance"].GetDouble();
+        mMaxIterations = rParameters["max_iteration"].GetInt();
+        mVerbosity=rParameters["verbosity"].GetInt();
+        mprm.put("solver.type", rParameters["krylov_type"].GetString());
+        
+        if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres")
+            mprm.put("solver.M",  rParameters["gmres_krylov_space_dimension"].GetInt());
+
+        //setting velocity solver options
+        mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString());
+        mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble());
+        mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt());
+        mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
+
+        //setting pressure solver options
+        mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
+        mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
+        mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
+        mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
+        mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0);
+        mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1);
+        mprm.put("precond.psolver.precond.coarse_enough",mCoarseEnough);
+
+    }
+
+
+    /**
+     * Destructor
+     */
+    virtual ~AmgclMPISchurComplementSolver() {}
+
+    /**
+     * Normal solve method.
+     * Solves the linear system Ax=b and puts the result on SystemVector& rX.
+     * rX is also th initial guess for iterative methods.
+     * @param rA. System matrix
+     * @param rX. Solution vector.
+     * @param rB. Right hand side vector.
+     */
+    bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB )
+    {
+        KRATOS_TRY
+
+        using amgcl::prof;
+        prof.reset();
+
+        amgcl::mpi::communicator world ( MPI_COMM_WORLD );
+        if ( mVerbosity >=0 && world.rank == 0 ) {
+            std::cout << "World size: " << world.size << std::endl;
+        }
+
+        int chunk = rA.NumMyRows();
+        boost::iterator_range<double*> xrange ( rX.Values(), rX.Values() + chunk );
+        boost::iterator_range<double*> frange ( rB.Values(), rB.Values() + chunk );
+
+        if ( mVerbosity > 1 && world.rank == 0 ) {
+            write_json ( std::cout, mprm );
+        }
+
+        typedef amgcl::backend::builtin<double> Backend;
+        
+        typedef  amgcl::mpi::make_solver<
+            amgcl::mpi::schur_pressure_correction<
+                amgcl::mpi::make_solver<
+                    amgcl::mpi::block_preconditioner<
+                        amgcl::runtime::relaxation::as_preconditioner<Backend>
+                    >,
+                    amgcl::runtime::iterative_solver
+                >,
+                amgcl::mpi::make_solver<
+                    amgcl::mpi::subdomain_deflation<
+                        amgcl::runtime::amg<Backend>,
+                        amgcl::runtime::iterative_solver,
+                        amgcl::runtime::mpi::direct_solver<double>
+                    >,
+                    amgcl::runtime::iterative_solver
+                >
+            >,
+            amgcl::runtime::iterative_solver
+            > SDD;
+
+        boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
+        mprm.put("precond.psolver.precond.num_def_vec", 1);
+        mprm.put("precond.psolver.precond.def_vec", &dv);
+        
+        mprm.put("precond.pmask", static_cast<void*>(&mPressureMask[0]));
+        mprm.put("precond.pmask_size", mPressureMask.size());
+
+        prof.tic ( "setup" );
+        SDD solve ( world, amgcl::backend::map ( rA ), mprm );
+        double tm_setup = prof.toc ( "setup" );
+
+        prof.tic ( "Solve" );
+        size_t iters;
+        double resid;
+        boost::tie ( iters, resid ) = solve ( frange, xrange );
+        double solve_tm = prof.toc ( "Solve" );
+
+
+
+        if ( rA.Comm().MyPID() == 0 ) {
+            if ( mVerbosity > 0 ) {
+                std::cout
+                        << "------- AMGCL -------\n" << std::endl
+                        << "Iterations      : " << iters   << std::endl
+                        << "Error           : " << resid   << std::endl
+                        << "amgcl setup time: " << tm_setup   << std::endl
+                        << "amgcl solve time: " << solve_tm   << std::endl;
+            }
+
+            if ( mVerbosity > 1 ) {
+                std::cout << prof  << std::endl;
+            }
+        }
+
+
+
+        return true;
+
+        KRATOS_CATCH ( "" );
+    }
+    
+    /**
+     * Assignment operator.
+     */
+    AmgclMPISchurComplementSolver& operator= ( const AmgclMPISchurComplementSolver& Other ) = delete;
+
+    /**
+     * Copy constructor.
+     */
+    AmgclMPISchurComplementSolver ( const AmgclMPISchurComplementSolver& Other ) = delete;
+
+    /**
+     * Multi solve method for solving a set of linear systems with same coefficient matrix.
+     * Solves the linear system Ax=b and puts the result on SystemVector& rX.
+     * rX is also th initial guess for iterative methods.
+     * @param rA. System matrix
+     * @param rX. Solution vector.
+     * @param rB. Right hand side vector.
+     */
+    bool Solve ( SparseMatrixType& rA, DenseMatrixType& rX, DenseMatrixType& rB ) override
+    {
+
+        return false;
+    }
+
+    /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example
+    * when solving a mixed u-p problem, it is important to identify the row associated to v and p.
+    * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers
+    * which require knowledge on the spatial position of the nodes associated to a given dof.
+    * This function tells if the solver requires such data
+    */
+    bool AdditionalPhysicalDataIsNeeded() override
+    {
+        return true;
+    }
+
+    /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example
+     * when solving a mixed u-p problem, it is important to identify the row associated to v and p.
+     * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers
+     * which require knowledge on the spatial position of the nodes associated to a given dof.
+     * This function is the place to eventually provide such data
+     */
+    void ProvideAdditionalData (
+        SparseMatrixType& rA,
+        VectorType& rX,
+        VectorType& rB,
+        typename ModelPart::DofsArrayType& rdof_set,
+        ModelPart& r_model_part
+    ) override
+    {
+
+        int my_pid = rA.Comm().MyPID();
+
+        //filling the pressure mask
+        if(mPressureMask.size() != static_cast<unsigned int>(rA.NumMyRows())) 
+            mPressureMask.resize( rA.NumMyRows(), false );
+
+        int counter = 0;
+        int npressures = 0;
+        for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++)
+        {
+            if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid )
+            {
+                mPressureMask[counter]  = (it->GetVariable().Key() == PRESSURE);
+                counter++;
+                
+#ifdef KRATOS_DEBUG
+                if(it->GetVariable().Key() == PRESSURE)
+                   npressures++; 
+#endif
+            }
+        }
+#ifdef KRATOS_DEBUG
+    std::cout << "MPI proc :" << my_pid << " npressures = " << npressures << " local rows = " << rA.NumMyRows();
+#endif
+        if(counter != rA.NumMyRows())
+            KRATOS_ERROR << "pressure mask as a size " << mPressureMask.size() << " which does not correspond with the number of local rows:" << rA.NumMyRows() << std::endl; 
+
+    }
+
+    /**
+     * Print information about this object.
+     */
+    void  PrintInfo ( std::ostream& rOStream ) const
+    {
+        rOStream << "AMGCL_MPI solver finished.";
+    }
+
+    /**
+     * Print object's data.
+     */
+    void  PrintData ( std::ostream& rOStream ) const
+    {
+    }
+
+private:
+
+    double mTolerance;
+    int mMaxIterations;
+    int mVerbosity = 0;
+    unsigned int mCoarseEnough = 5000;
+
+    std::vector< char > mPressureMask; //pressure mask
+    boost::property_tree::ptree mprm;
+
+
+
+
+};
+
+
+/**
+ * output stream function
+ */
+template<class TSparseSpaceType, class TDenseSpaceType, class TReordererType>
+inline std::ostream& operator << ( std::ostream& rOStream,
+                                   const AmgclMPISchurComplementSolver<TSparseSpaceType,
+                                   TDenseSpaceType, TReordererType>& rThis )
+{
+    rThis.PrintInfo ( rOStream );
+    rOStream << std::endl;
+    rThis.PrintData ( rOStream );
+
+    return rOStream;
+}
+
+
+}  // namespace Kratos.
+
+#endif // KRATOS_AMGCL_MPI_SCHUR_COMPLEMENT_SOLVER_H_INCLUDED  defined 
+
+
diff --git a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
index 853140f1616e..f948a881e7a1 100644
--- a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
+++ b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py
@@ -21,8 +21,8 @@ def ConstructSolver(configuration):
         linear_solver = MultiLevelSolver(configuration)
     elif(solver_type == "AmgclMPISolver"):
         linear_solver = AmgclMPISolver(configuration);
-    elif(solver_type == "AmgclMPI_NS_Solver"):
-        linear_solver = AmgclMPI_NS_Solver(configuration);
+    elif(solver_type == "AmgclMPISchurComplementSolver"):
+        linear_solver = AmgclMPISchurComplementSolver(configuration);
     #
     elif(solver_type == "Deflated Conjugate gradient"):
         raise Exception("not implemented within trilinos")

From f757422ac59a4b5e2cbb658a97443f15fdc09027 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 8 Aug 2017 11:03:55 +0200
Subject: [PATCH 06/12] styling change

---
 .../external_includes/amgcl_mpi_schur_complement_solver.h   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
index 07965a245877..7c45cc4c935c 100644
--- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
+++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
@@ -290,8 +290,8 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
         SparseMatrixType& rA,
         VectorType& rX,
         VectorType& rB,
-        typename ModelPart::DofsArrayType& rdof_set,
-        ModelPart& r_model_part
+        typename ModelPart::DofsArrayType& rDofSet,
+        ModelPart& rModelPart
     ) override
     {
 
@@ -303,7 +303,7 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
 
         int counter = 0;
         int npressures = 0;
-        for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++)
+        for (ModelPart::DofsArrayType::iterator it = rDofSet.begin(); it!=rDofSet.end(); it++)
         {
             if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid )
             {

From de8b53db14ecf34e48f0ca9f7af8557ee45106e9 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Thu, 10 Aug 2017 14:39:38 +0200
Subject: [PATCH 07/12] bumping up amgcl library version

---
 .../amgcl/adapter/crs_tuple.hpp               |   1 +
 external_libraries/amgcl/amg.hpp              |  83 +++++----
 external_libraries/amgcl/backend/builtin.hpp  |   7 +-
 external_libraries/amgcl/backend/cuda.hpp     |  43 +++++
 external_libraries/amgcl/backend/hpx.hpp      |   1 -
 .../amgcl/backend/interface.hpp               |  27 ++-
 external_libraries/amgcl/backend/vexcl.hpp    |   2 +-
 .../amgcl/coarsening/aggregation.hpp          |   8 +-
 .../amgcl/coarsening/ruge_stuben.hpp          |  14 +-
 .../amgcl/coarsening/smoothed_aggr_emin.hpp   |   8 +-
 .../amgcl/coarsening/smoothed_aggregation.hpp | 158 ++++++++++++++++--
 .../coarsening/tentative_prolongation.hpp     |   4 +-
 .../amgcl/make_block_solver.hpp               |   4 +
 .../amgcl/mpi/inner_product.hpp               |   4 +-
 .../amgcl/mpi/schur_pressure_correction.hpp   |  51 ++++--
 .../amgcl/mpi/subdomain_deflation.hpp         |  76 ++++-----
 .../amgcl/preconditioner/dummy.hpp            |   2 +-
 .../schur_pressure_correction.hpp             |  27 ++-
 external_libraries/amgcl/profiler.hpp         |  17 +-
 .../amgcl/relaxation/chebyshev.hpp            |   6 +-
 .../amgcl/relaxation/detail/ilu_solve.hpp     | 123 +++++++++++---
 external_libraries/amgcl/relaxation/ilu0.hpp  |  22 +--
 external_libraries/amgcl/relaxation/iluk.hpp  |  23 +--
 external_libraries/amgcl/relaxation/ilut.hpp  |  23 +--
 external_libraries/amgcl/runtime.hpp          |  14 +-
 .../amgcl/solver/skyline_lu.hpp               | 127 +-------------
 external_libraries/amgcl/util.hpp             |  18 +-
 27 files changed, 557 insertions(+), 336 deletions(-)

diff --git a/external_libraries/amgcl/adapter/crs_tuple.hpp b/external_libraries/amgcl/adapter/crs_tuple.hpp
index 41e5e19ae70e..65c6ec76c4ce 100644
--- a/external_libraries/amgcl/adapter/crs_tuple.hpp
+++ b/external_libraries/amgcl/adapter/crs_tuple.hpp
@@ -64,6 +64,7 @@ AMG amg(boost::make_tuple(n,
 #include <boost/tuple/tuple.hpp>
 #include <boost/range/const_iterator.hpp>
 #include <boost/range/value_type.hpp>
+#include <boost/range/size.hpp>
 #include <boost/type_traits.hpp>
 
 #include <amgcl/util.hpp>
diff --git a/external_libraries/amgcl/amg.hpp b/external_libraries/amgcl/amg.hpp
index 86ed0099ac92..fd919f50508a 100644
--- a/external_libraries/amgcl/amg.hpp
+++ b/external_libraries/amgcl/amg.hpp
@@ -325,39 +325,48 @@ class amg {
             level() {}
 
             level(boost::shared_ptr<build_matrix> A,
-                    params &prm, const backend_params &bprm) :
-                m_rows(backend::rows(*A)),
-                m_nonzeros(backend::nonzeros(*A)),
-                f(Backend::create_vector(m_rows, bprm)),
-                u(Backend::create_vector(m_rows, bprm)),
-                t(Backend::create_vector(m_rows, bprm)),
-                A(Backend::copy_matrix(A, bprm)),
-                relax(boost::make_shared<relax_type>(*A, prm.relax, bprm))
-            {}
+                    params &prm, const backend_params &bprm)
+                : m_rows(backend::rows(*A)), m_nonzeros(backend::nonzeros(*A))
+            {
+                AMGCL_TIC("move to backend");
+                f = Backend::create_vector(m_rows, bprm);
+                u = Backend::create_vector(m_rows, bprm);
+                t = Backend::create_vector(m_rows, bprm);
+                this->A = Backend::copy_matrix(A, bprm);
+                AMGCL_TOC("move to backend");
+
+                AMGCL_TIC("relaxation");
+                relax = boost::make_shared<relax_type>(*A, prm.relax, bprm);
+                AMGCL_TOC("relaxation");
+            }
 
             boost::shared_ptr<build_matrix> step_down(
                     boost::shared_ptr<build_matrix> A,
                     params &prm, const backend_params &bprm)
             {
-                TIC("transfer operators");
+                AMGCL_TIC("transfer operators");
                 boost::shared_ptr<build_matrix> P, R;
                 boost::tie(P, R) = Coarsening::transfer_operators(
                         *A, prm.coarsening);
 
-                precondition(backend::cols(*P) > 0,
-                        "Zero-sized coarse level in amgcl (diagonal matrix?)");
+                if(backend::cols(*P) == 0) {
+                    // Zero-sized coarse level in amgcl (diagonal matrix?)
+                    return boost::shared_ptr<build_matrix>();
+                }
 
                 sort_rows(*P);
                 sort_rows(*R);
-                TOC("transfer operators");
+                AMGCL_TOC("transfer operators");
 
+                AMGCL_TIC("move to backend");
                 this->P = Backend::copy_matrix(P, bprm);
                 this->R = Backend::copy_matrix(R, bprm);
+                AMGCL_TOC("move to backend");
 
-                TIC("coarse operator");
+                AMGCL_TIC("coarse operator");
                 A = Coarsening::coarse_operator(*A, *P, *R, prm.coarsening);
                 sort_rows(*A);
-                TOC("coarse operator");
+                AMGCL_TOC("coarse operator");
 
                 return A;
             }
@@ -405,6 +414,8 @@ class amg {
                     "Matrix should be square!"
                     );
 
+            bool direct_coarse_solve = true;
+
             while( backend::rows(*A) > prm.coarse_enough && levels.size() < prm.max_levels) {
                 {
 #ifdef AMGCL_ASYNC_SETUP
@@ -416,10 +427,22 @@ class amg {
                 ready_to_cycle.notify_all();
 #endif
                 A = levels.back().step_down(A, prm, bprm);
+                if (!A) {
+                    // Zero-sized coarse level. Probably the system matrix on
+                    // this level is diagonal, should be easily solvable with a
+                    // couple of smoother iterations.
+                    direct_coarse_solve = false;
+                    break;
+                }
             }
 
-            if (levels.size() < prm.max_levels) {
-                TIC("coarsest level");
+            if (!A || backend::rows(*A) > prm.coarse_enough) {
+                // The coarse matrix is still too big to be solved directly.
+                direct_coarse_solve = false;
+            }
+
+            if (direct_coarse_solve) {
+                AMGCL_TIC("coarsest level");
                 if (prm.direct_coarse) {
                     level l;
                     l.create_coarse(A, bprm, levels.empty());
@@ -441,7 +464,7 @@ class amg {
 #ifdef AMGCL_ASYNC_SETUP
                 ready_to_cycle.notify_all();
 #endif
-                TOC("coarsest level");
+                AMGCL_TOC("coarsest level");
             }
         }
 
@@ -478,41 +501,35 @@ class amg {
 
             if (nxt == end) {
                 if (lvl->solve) {
-                    TIC("coarse");
+                    AMGCL_TIC("coarse");
                     (*lvl->solve)(rhs, x);
-                    TOC("coarse");
+                    AMGCL_TOC("coarse");
                 } else {
-                    TIC("relax");
+                    AMGCL_TIC("relax");
                     lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t, prm.relax);
                     lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t, prm.relax);
-                    TOC("relax");
+                    AMGCL_TOC("relax");
                 }
             } else {
                 for (size_t j = 0; j < prm.ncycle; ++j) {
-                    TIC("relax");
+                    AMGCL_TIC("relax");
                     for(size_t i = 0; i < prm.npre; ++i)
                         lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t, prm.relax);
-                    TOC("relax");
+                    AMGCL_TOC("relax");
 
-                    TIC("residual");
                     backend::residual(rhs, *lvl->A, x, *lvl->t);
-                    TOC("residual");
 
-                    TIC("restrict");
                     backend::spmv(math::identity<scalar_type>(), *lvl->R, *lvl->t, math::zero<scalar_type>(), *nxt->f);
-                    TOC("restrict");
 
                     backend::clear(*nxt->u);
                     cycle(nxt, *nxt->f, *nxt->u);
 
-                    TIC("prolongate");
                     backend::spmv(math::identity<scalar_type>(), *lvl->P, *nxt->u, math::identity<scalar_type>(), x);
-                    TOC("prolongate");
 
-                    TIC("relax");
-                    for(size_t i = 0; i < prm.npre; ++i)
+                    AMGCL_TIC("relax");
+                    for(size_t i = 0; i < prm.npost; ++i)
                         lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t, prm.relax);
-                    TOC("relax");
+                    AMGCL_TOC("relax");
                 }
             }
         }
diff --git a/external_libraries/amgcl/backend/builtin.hpp b/external_libraries/amgcl/backend/builtin.hpp
index c9e78881aea6..1e244b13a176 100644
--- a/external_libraries/amgcl/backend/builtin.hpp
+++ b/external_libraries/amgcl/backend/builtin.hpp
@@ -368,7 +368,7 @@ product(const MatrixA &A, const MatrixB &B, bool sort = false) {
     int nt = 1;
 #endif
 
-    if (nt > 4) {
+    if (nt > 16) {
         spgemm_rmerge(A, B, *C);
     } else {
         spgemm_saad(A, B, *C, sort);
@@ -428,7 +428,10 @@ class numa_vector {
         }
 
         template <class Vector>
-        numa_vector(const Vector &other) : n(other.size()), p(new T[n]) {
+        numa_vector(const Vector &other,
+                typename boost::disable_if<boost::is_integral<Vector>, int>::type = 0
+                ) : n(other.size()), p(new T[n])
+        {
 #pragma omp parallel for
             for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i)
                 p[i] = other[i];
diff --git a/external_libraries/amgcl/backend/cuda.hpp b/external_libraries/amgcl/backend/cuda.hpp
index 4f474a6bb623..21d22ecefa6a 100644
--- a/external_libraries/amgcl/backend/cuda.hpp
+++ b/external_libraries/amgcl/backend/cuda.hpp
@@ -86,6 +86,14 @@ inline void cuda_check(cusparseStatus_t rc, const char *file, int line) {
     }
 }
 
+inline void cuda_check(cudaError_t rc, const char *file, int line) {
+    if (rc != cudaSuccess) {
+        std::ostringstream msg;
+        msg << "CUDA error " << rc << " at \"" << file << ":" << line;
+        precondition(false, msg.str());
+    }
+}
+
 #define AMGCL_CALL_CUDA(rc)                                                    \
     amgcl::backend::detail::cuda_check(rc, __FILE__, __LINE__)
 
@@ -105,6 +113,10 @@ struct cuda_deleter {
     void operator()(csrsv2Info_t handle) {
         AMGCL_CALL_CUDA( cusparseDestroyCsrsv2Info(handle) );
     }
+
+    void operator()(cudaEvent_t handle) {
+        AMGCL_CALL_CUDA( cudaEventDestroy(handle) );
+    }
 };
 
 
@@ -536,6 +548,37 @@ struct vmul_impl<
     }
 };
 
+class cuda_event {
+    public:
+        cuda_event() : e(create_event(), backend::detail::cuda_deleter()) { }
+
+        float operator-(cuda_event tic) const {
+            float delta;
+            cudaEventSynchronize(e.get());
+            cudaEventElapsedTime(&delta, tic.e.get(), e.get());
+            return delta / 1000.0f;
+        }
+    private:
+        boost::shared_ptr<boost::remove_pointer<cudaEvent_t>::type> e;
+
+        static cudaEvent_t create_event() {
+            cudaEvent_t e;
+            cudaEventCreate(&e);
+            cudaEventRecord(e, 0);
+            return e;
+        }
+};
+
+struct cuda_clock {
+    typedef cuda_event value_type;
+
+    static const char* units() { return "s"; }
+
+    cuda_event current() const {
+        return cuda_event();
+    }
+};
+
 } // namespace backend
 } // namespace amgcl
 
diff --git a/external_libraries/amgcl/backend/hpx.hpp b/external_libraries/amgcl/backend/hpx.hpp
index 811d8e8c402e..e6e63121f202 100644
--- a/external_libraries/amgcl/backend/hpx.hpp
+++ b/external_libraries/amgcl/backend/hpx.hpp
@@ -38,7 +38,6 @@ THE SOFTWARE.
 #include <hpx/include/parallel_for_each.hpp>
 #include <hpx/include/parallel_transform_reduce.hpp>
 
-#include <boost/algorithm/minmax.hpp>
 #include <boost/range/irange.hpp>
 #include <boost/range/algorithm.hpp>
 #include <boost/range/iterator_range.hpp>
diff --git a/external_libraries/amgcl/backend/interface.hpp b/external_libraries/amgcl/backend/interface.hpp
index 9bab59964647..417823e2a147 100644
--- a/external_libraries/amgcl/backend/interface.hpp
+++ b/external_libraries/amgcl/backend/interface.hpp
@@ -270,7 +270,9 @@ void spmv(
         Beta beta,
         Vector2 &y)
 {
+    AMGCL_TIC("spmv");
     spmv_impl<Alpha, Matrix, Vector1, Beta, Vector2>::apply(alpha, A, x, beta, y);
+    AMGCL_TOC("spmv");
 }
 
 /// Computes residual error.
@@ -280,28 +282,36 @@ void spmv(
 template <class Matrix, class Vector1, class Vector2, class Vector3>
 void residual(const Vector1 &rhs, const Matrix &A, const Vector2 &x, Vector3 &r)
 {
+    AMGCL_TIC("residual");
     residual_impl<Matrix, Vector1, Vector2, Vector3>::apply(rhs, A, x, r);
+    AMGCL_TOC("residual");
 }
 
 /// Zeros out a vector.
 template <class Vector>
 void clear(Vector &x)
 {
+    AMGCL_TIC("clear");
     clear_impl<Vector>::apply(x);
+    AMGCL_TOC("clear");
 }
 
 /// Vector copy.
 template <class Vector1, class Vector2>
 void copy(const Vector1 &x, Vector2 &y)
 {
+    AMGCL_TIC("copy");
     copy_impl<Vector1, Vector2>::apply(x, y);
+    AMGCL_TOC("copy");
 }
 
 /// Copy data to backend.
 template <class Vector>
 void copy_to_backend(const std::vector<typename value_type<Vector>::type> &data, Vector &x)
 {
+    AMGCL_TIC("copy_to_backend");
     copy_to_backend_impl<Vector>::apply(data, x);
+    AMGCL_TOC("copy_to_backend");
 }
 
 /// Computes inner product of two vectors.
@@ -311,7 +321,15 @@ typename math::inner_product_impl<
     >::return_type
 inner_product(const Vector1 &x, const Vector2 &y)
 {
-    return inner_product_impl<Vector1, Vector2>::get(x, y);
+    typedef typename math::inner_product_impl<
+        typename value_type<Vector1>::type
+        >::return_type result_type;
+
+    AMGCL_TIC("inner_product");
+    result_type p = inner_product_impl<Vector1, Vector2>::get(x, y);
+    AMGCL_TOC("inner_product");
+
+    return p;
 }
 
 /// Computes linear combination of two vectors.
@@ -320,7 +338,9 @@ inner_product(const Vector1 &x, const Vector2 &y)
  */
 template <class A, class Vector1, class B, class Vector2>
 void axpby(A a, Vector1 const &x, B b, Vector2 &y) {
+    AMGCL_TIC("axpby");
     axpby_impl<A, Vector1, B, Vector2>::apply(a, x, b, y);
+    AMGCL_TOC("axpby");
 }
 
 /// Computes linear combination of three vectors.
@@ -329,7 +349,9 @@ void axpby(A a, Vector1 const &x, B b, Vector2 &y) {
  */
 template <class A, class Vector1, class B, class Vector2, class C, class Vector3>
 void axpbypcz(A a, Vector1 const &x, B b, Vector2 const &y, C c, Vector3 &z) {
+    AMGCL_TIC("axpbypcz");
     axpbypcz_impl<A, Vector1, B, Vector2, C, Vector3>::apply(a, x, b, y, c, z);
+    AMGCL_TOC("axpbypcz");
 }
 
 /// Computes element-wize vector product.
@@ -339,7 +361,9 @@ void axpbypcz(A a, Vector1 const &x, B b, Vector2 const &y, C c, Vector3 &z) {
 template <class Alpha, class Vector1, class Vector2, class Beta, class Vector3>
 void vmul(Alpha alpha, const Vector1 &x, const Vector2 &y, Beta beta, Vector3 &z)
 {
+    AMGCL_TIC("vmul");
     vmul_impl<Alpha, Vector1, Vector2, Beta, Vector3>::apply(alpha, x, y, beta, z);
+    AMGCL_TOC("vmul");
 }
 
 /// Is the relaxation supported by the backend?
@@ -357,7 +381,6 @@ struct coarsening_is_supported : boost::true_type {};
 template <class Coefs, class Vecs, class Coef, class Vec>
 void lin_comb(size_t n, const Coefs &c, const Vecs &v, const Coef &alpha, Vec &y) {
     axpby(c[0], *v[0], alpha, y);
-
     size_t i = 1;
     for(; i + 1 < n; i += 2)
         axpbypcz(c[i], *v[i], c[i+1], *v[i+1], math::identity<Coef>(), y);
diff --git a/external_libraries/amgcl/backend/vexcl.hpp b/external_libraries/amgcl/backend/vexcl.hpp
index 33123c29a96a..16fc5211893c 100644
--- a/external_libraries/amgcl/backend/vexcl.hpp
+++ b/external_libraries/amgcl/backend/vexcl.hpp
@@ -111,7 +111,7 @@ struct vexcl {
         params() : fast_matrix_setup(true) {}
 
         params(const boost::property_tree::ptree &p)
-            : AMGCL_PARAMS_IMPORT_CHILD(p, fast_matrix_setup)
+            : AMGCL_PARAMS_IMPORT_VALUE(p, fast_matrix_setup)
         {
             std::vector<vex::backend::command_queue> *ptr = 0;
             ptr = p.get("q", ptr);
diff --git a/external_libraries/amgcl/coarsening/aggregation.hpp b/external_libraries/amgcl/coarsening/aggregation.hpp
index 33f4a12dffce..b48c14d8144d 100644
--- a/external_libraries/amgcl/coarsening/aggregation.hpp
+++ b/external_libraries/amgcl/coarsening/aggregation.hpp
@@ -126,15 +126,15 @@ struct aggregation {
     {
         const size_t n = rows(A);
 
-        TIC("aggregates");
+        AMGCL_TIC("aggregates");
         Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
-        TOC("aggregates");
+        AMGCL_TOC("aggregates");
 
-        TIC("interpolation");
+        AMGCL_TIC("interpolation");
         boost::shared_ptr<Matrix> P = tentative_prolongation<Matrix>(
                 n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size
                 );
-        TOC("interpolation");
+        AMGCL_TOC("interpolation");
 
         if (prm.nullspace.cols > 0)
             prm.aggr.block_size = prm.nullspace.cols;
diff --git a/external_libraries/amgcl/coarsening/ruge_stuben.hpp b/external_libraries/amgcl/coarsening/ruge_stuben.hpp
index 3aa6c6683623..c5df4785bb25 100644
--- a/external_libraries/amgcl/coarsening/ruge_stuben.hpp
+++ b/external_libraries/amgcl/coarsening/ruge_stuben.hpp
@@ -113,12 +113,12 @@ struct ruge_stuben {
         std::vector<char> cf(n, 'U');
         backend::crs<char, ptrdiff_t, ptrdiff_t> S;
 
-        TIC("C/F split");
+        AMGCL_TIC("C/F split");
         connect(A, prm.eps_strong, S, cf);
         cfsplit(A, S, cf);
-        TOC("C/F split");
+        AMGCL_TOC("C/F split");
 
-        TIC("interpolation");
+        AMGCL_TIC("interpolation");
         size_t nc = 0;
         std::vector<ptrdiff_t> cidx(n);
         for(size_t i = 0; i < n; ++i)
@@ -230,14 +230,14 @@ struct ruge_stuben {
                 Val  v = A.val[j];
 
                 if (!S.val[j] || cf[c] != 'C') continue;
-                if (prm.do_trunc && Amin[i] < v && v < Amax[i]) continue;
+                if (prm.do_trunc && Amin[i] <= v && v <= Amax[i]) continue;
 
                 P->col[row_head] = cidx[c];
                 P->val[row_head] = (v < zero ? alpha : beta) * v;
                 ++row_head;
             }
         }
-        TOC("interpolation");
+        AMGCL_TOC("interpolation");
 
         return boost::make_tuple(P, transpose(*P));
     }
@@ -286,6 +286,8 @@ struct ruge_stuben {
 
 #pragma omp parallel for
             for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
+                S.ptr[i+1] = 0;
+
                 Val a_min = math::zero<Val>();
 
                 for(row_iterator a = row_begin(A, i); a; ++a)
@@ -300,8 +302,6 @@ struct ruge_stuben {
 
                 for(Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
                     S.val[j] = (A.col[j] != i && A.val[j] < a_min);
-
-                S.ptr[i+1] = 0;
             }
 
             // Transposition of S:
diff --git a/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp b/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp
index 092e6b0028de..e148644694f8 100644
--- a/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp
+++ b/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp
@@ -89,12 +89,12 @@ struct smoothed_aggr_emin {
         typedef typename backend::value_type<Matrix>::type Val;
         typedef ptrdiff_t Idx;
 
-        TIC("aggregates");
+        AMGCL_TIC("aggregates");
         Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
         prm.aggr.eps_strong *= 0.5;
-        TOC("aggregates");
+        AMGCL_TOC("aggregates");
 
-        TIC("interpolation");
+        AMGCL_TIC("interpolation");
         boost::shared_ptr<Matrix> P_tent = tentative_prolongation<Matrix>(
                 rows(A), aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size
                 );
@@ -157,7 +157,7 @@ struct smoothed_aggr_emin {
 
         boost::shared_ptr<Matrix> P = interpolation(Af, dia, *P_tent, omega);
         boost::shared_ptr<Matrix> R = restriction  (Af, dia, *P_tent, omega);
-        TOC("interpolation");
+        AMGCL_TOC("interpolation");
 
         if (prm.nullspace.cols > 0)
             prm.aggr.block_size = prm.nullspace.cols;
diff --git a/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp b/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp
index 9e04b2d112f1..b46b53ac53da 100644
--- a/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp
+++ b/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp
@@ -31,9 +31,15 @@ THE SOFTWARE.
  * \brief  Smoothed aggregation coarsening scheme.
  */
 
+#ifdef _OPENMP
+#  include <omp.h>
+#endif
+
 #include <boost/tuple/tuple.hpp>
 #include <boost/shared_ptr.hpp>
 #include <boost/make_shared.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_real_distribution.hpp>
 
 #include <amgcl/backend/builtin.hpp>
 #include <amgcl/coarsening/detail/galerkin.hpp>
@@ -60,8 +66,14 @@ struct smoothed_aggregation {
         /// Near nullspace parameters.
         nullspace_params nullspace;
 
-        /// Relaxation factor \f$\omega\f$.
+        /// Relaxation factor.
         /**
+         * Used as a scaling for the damping factor omega.
+         * When estimate_spectral_radius is set, then
+         *   omega = relax * (4/3) / rho.
+         * Otherwise
+         *   omega = relax * (2/3).
+         *
          * Piecewise constant prolongation \f$\tilde P\f$ from non-smoothed
          * aggregation is improved by a smoothing to get the final prolongation
          * matrix \f$P\f$. Simple Jacobi smoother is used here, giving the
@@ -82,20 +94,33 @@ struct smoothed_aggregation {
          */
         float relax;
 
-        params() : relax(0.666f) { }
+        // Use power iterations to estimate the matrix spectral radius.
+        // This usually improves convergence rate and results in faster solves,
+        // but costs some time during setup.
+        bool estimate_spectral_radius;
+
+        // Number of power iterations to apply for the spectral radius
+        // estimation.
+        int power_iters;
+
+        params() : relax(1.0f), estimate_spectral_radius(true), power_iters(5) { }
 
         params(const boost::property_tree::ptree &p)
             : AMGCL_PARAMS_IMPORT_CHILD(p, aggr),
               AMGCL_PARAMS_IMPORT_CHILD(p, nullspace),
-              AMGCL_PARAMS_IMPORT_VALUE(p, relax)
+              AMGCL_PARAMS_IMPORT_VALUE(p, relax),
+              AMGCL_PARAMS_IMPORT_VALUE(p, estimate_spectral_radius),
+              AMGCL_PARAMS_IMPORT_VALUE(p, power_iters)
         {
-            AMGCL_PARAMS_CHECK(p, (aggr)(nullspace)(relax));
+            AMGCL_PARAMS_CHECK(p, (aggr)(nullspace)(relax)(estimate_spectral_radius)(power_iters));
         }
 
         void get(boost::property_tree::ptree &p, const std::string &path) const {
             AMGCL_PARAMS_EXPORT_CHILD(p, path, aggr);
             AMGCL_PARAMS_EXPORT_CHILD(p, path, nullspace);
             AMGCL_PARAMS_EXPORT_VALUE(p, path, relax);
+            AMGCL_PARAMS_EXPORT_VALUE(p, path, estimate_spectral_radius);
+            AMGCL_PARAMS_EXPORT_VALUE(p, path, power_iters);
         }
     };
 
@@ -109,12 +134,12 @@ struct smoothed_aggregation {
 
         const size_t n = rows(A);
 
-        TIC("aggregates");
+        AMGCL_TIC("aggregates");
         Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
         prm.aggr.eps_strong *= 0.5;
-        TOC("aggregates");
+        AMGCL_TOC("aggregates");
 
-        TIC("interpolation");
+        AMGCL_TIC("interpolation");
         boost::shared_ptr<Matrix> P_tent = tentative_prolongation<Matrix>(
                 n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size
                 );
@@ -122,6 +147,13 @@ struct smoothed_aggregation {
         boost::shared_ptr<Matrix> P = boost::make_shared<Matrix>();
         P->set_size(rows(*P_tent), cols(*P_tent), true);
 
+        scalar_type omega = prm.relax;
+        if (prm.estimate_spectral_radius) {
+            omega *= static_cast<scalar_type>(4.0/3) / spectral_radius(A, prm.power_iters);
+        } else {
+            omega *= static_cast<scalar_type>(2.0/3);
+        }
+
 #pragma omp parallel
         {
             std::vector<ptrdiff_t> marker(P->ncols, -1);
@@ -179,8 +211,8 @@ struct smoothed_aggregation {
                     if (ca != i && !aggr.strong_connection[ja]) continue;
 
                     value_type va = (ca == i)
-                        ? static_cast<value_type>(static_cast<scalar_type>(1 - prm.relax) * math::identity<value_type>())
-                        : static_cast<value_type>(static_cast<scalar_type>(-prm.relax) * dia * A.val[ja]);
+                        ? static_cast<value_type>(static_cast<scalar_type>(1 - omega) * math::identity<value_type>())
+                        : static_cast<value_type>(static_cast<scalar_type>(-omega) * dia * A.val[ja]);
 
                     for(ptrdiff_t jp = P_tent->ptr[ca], ep = P_tent->ptr[ca+1]; jp < ep; ++jp) {
                         ptrdiff_t cp = P_tent->col[jp];
@@ -198,7 +230,7 @@ struct smoothed_aggregation {
                 }
             }
         }
-        TOC("interpolation");
+        AMGCL_TOC("interpolation");
 
         if (prm.nullspace.cols > 0)
             prm.aggr.block_size = prm.nullspace.cols;
@@ -218,6 +250,112 @@ struct smoothed_aggregation {
     {
         return detail::galerkin(A, P, R);
     }
+
+    // Uses power iteration to estimate spectral readius of the matrix,
+    // scaled by its inverse diagonal.
+    template <class Matrix>
+    static
+    typename math::scalar_of<typename backend::value_type<Matrix>::type>::type
+    spectral_radius(const Matrix &A, int power_iters)
+    {
+        typedef typename backend::value_type<Matrix>::type   value_type;
+        typedef typename math::rhs_of<value_type>::type      rhs_type;
+        typedef typename math::scalar_of<value_type>::type   scalar_type;
+
+        const ptrdiff_t n = backend::rows(A);
+
+        backend::numa_vector<value_type> D(n, false);
+        backend::numa_vector<rhs_type>   b0(n, false), b1(n, false);
+
+        // Fill the initial vector with random values.
+        // Also extract the inverted matrix diagonal values.
+        scalar_type b0_norm = 0;
+#pragma omp parallel
+        {
+#ifdef _OPENMP
+            int tid = omp_get_thread_num();
+#else
+            int tid = 0;
+#endif
+            boost::random::mt11213b rng(tid);
+            boost::random::uniform_real_distribution<scalar_type> rnd(-1, 1);
+
+            scalar_type loc_norm = 0;
+
+#pragma omp for nowait
+            for(ptrdiff_t i = 0; i < n; ++i) {
+                rhs_type v = math::constant<rhs_type>(rnd(rng));
+
+                b0[i] = v;
+                loc_norm += math::norm(math::inner_product(v,v));
+
+                for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) {
+                    if (A.col[j] == i) {
+                        D[i] = math::inverse(A.val[j]);
+                        break;
+                    }
+                }
+            }
+
+#pragma omp critical
+            b0_norm += loc_norm;
+        }
+
+        // Normalize b0
+        b0_norm = 1 / sqrt(b0_norm);
+#pragma omp parallel for
+        for(ptrdiff_t i = 0; i < n; ++i) {
+            b0[i] = b0_norm * b0[i];
+        }
+
+        scalar_type radius = 1;
+
+        for(int iter = 0; iter < power_iters;) {
+            // b1 = (D * A) * b0
+            // b1_norm = ||b1||
+            // radius = <b1,b0>
+            scalar_type b1_norm = 0;
+            radius = 0;
+#pragma omp parallel
+            {
+                scalar_type loc_norm = 0;
+                scalar_type loc_radi = 0;
+
+#pragma omp for nowait
+                for(ptrdiff_t i = 0; i < n; ++i) {
+                    rhs_type s = math::zero<rhs_type>();
+
+                    for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) {
+                        s += A.val[j] * b0[A.col[j]];
+                    }
+
+                    s = D[i] * s;
+
+                    loc_norm += math::norm(math::inner_product(s, s));
+                    loc_radi += math::norm(math::inner_product(s, b0[i]));
+
+                    b1[i] = s;
+                }
+
+#pragma omp critical
+                {
+                    b1_norm += loc_norm;
+                    radius  += loc_radi;
+                }
+            }
+
+            if (++iter < power_iters) {
+                // b0 = b1 / b1_norm
+                b1_norm = 1 / sqrt(b1_norm);
+#pragma omp parallel for
+                for(ptrdiff_t i = 0; i < n; ++i) {
+                    b0[i] = b1_norm * b1[i];
+                }
+            }
+        }
+
+        return radius < 0 ? static_cast<scalar_type>(2) : radius;
+    }
 };
 
 } // namespace coarsening
diff --git a/external_libraries/amgcl/coarsening/tentative_prolongation.hpp b/external_libraries/amgcl/coarsening/tentative_prolongation.hpp
index 497e075a871d..58e8f5e7784a 100644
--- a/external_libraries/amgcl/coarsening/tentative_prolongation.hpp
+++ b/external_libraries/amgcl/coarsening/tentative_prolongation.hpp
@@ -129,7 +129,7 @@ boost::shared_ptr<Matrix> tentative_prolongation(
 
     boost::shared_ptr<Matrix> P = boost::make_shared<Matrix>();
 
-    TIC("tentative");
+    AMGCL_TIC("tentative");
     if (nullspace.cols > 0) {
         // Sort fine points by aggregate number.
         // Put points not belonging to any aggregate to the end of the list.
@@ -210,7 +210,7 @@ boost::shared_ptr<Matrix> tentative_prolongation(
             }
         }
     }
-    TOC("tentative");
+    AMGCL_TOC("tentative");
 
     return P;
 }
diff --git a/external_libraries/amgcl/make_block_solver.hpp b/external_libraries/amgcl/make_block_solver.hpp
index c65f58493187..37167245e2b6 100644
--- a/external_libraries/amgcl/make_block_solver.hpp
+++ b/external_libraries/amgcl/make_block_solver.hpp
@@ -117,6 +117,10 @@ class make_block_solver {
         typename Precond::matrix const& system_matrix() const {
             return S->system_matrix();
         }
+
+        friend std::ostream& operator<<(std::ostream &os, const make_block_solver &p) {
+            return os << *p.S << std::endl;
+        }
     private:
         typedef make_solver<Precond, IterativeSolver> Solver;
         boost::shared_ptr<Solver> S;
diff --git a/external_libraries/amgcl/mpi/inner_product.hpp b/external_libraries/amgcl/mpi/inner_product.hpp
index d484024b86c8..2b4311504907 100644
--- a/external_libraries/amgcl/mpi/inner_product.hpp
+++ b/external_libraries/amgcl/mpi/inner_product.hpp
@@ -48,7 +48,7 @@ struct inner_product {
     template <class Vec1, class Vec2>
     typename backend::value_type<Vec1>::type
     operator()(const Vec1 &x, const Vec2 &y) const {
-        TIC("inner product");
+        AMGCL_TIC("inner product");
         typedef typename backend::value_type<Vec1>::type value_type;
 
         value_type lsum = backend::inner_product(x, y);
@@ -56,7 +56,7 @@ struct inner_product {
 
         MPI_Allreduce(&lsum, &gsum, 1, datatype<value_type>(), MPI_SUM, comm);
 
-        TOC("inner product");
+        AMGCL_TOC("inner product");
         return gsum;
     }
 };
diff --git a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
index fd3f9706ceaa..afab65dbab86 100644
--- a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
+++ b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
@@ -77,11 +77,17 @@ class schur_pressure_correction {
 
             std::vector<char> pmask;
 
-            params() {}
+            // Approximate Kuu^-1 with inverted diagonal of Kuu during
+            // construction of matrix-less Schur complement.
+            // When false, USolver is used instead.
+            bool approx_schur;
+
+            params() : approx_schur(true) {}
 
             params(const boost::property_tree::ptree &p)
                 : AMGCL_PARAMS_IMPORT_CHILD(p, usolver),
-                  AMGCL_PARAMS_IMPORT_CHILD(p, psolver)
+                  AMGCL_PARAMS_IMPORT_CHILD(p, psolver),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, approx_schur)
             {
                 void *pm = 0;
                 size_t n = 0;
@@ -100,13 +106,14 @@ class schur_pressure_correction {
 
                 pmask.assign(static_cast<char*>(pm), static_cast<char*>(pm) + n);
 
-                AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size));
+                AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size)(approx_schur));
             }
 
             void get(boost::property_tree::ptree &p, const std::string &path = "") const
             {
                 AMGCL_PARAMS_EXPORT_CHILD(p, path, usolver);
                 AMGCL_PARAMS_EXPORT_CHILD(p, path, psolver);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, approx_schur);
             }
         } prm;
 
@@ -233,7 +240,7 @@ class schur_pressure_correction {
             Kup_loc->set_size(nu, np, true);
 
             Kpu_rem->set_size(np, 0, true);
-            Kup_rem->set_size(np, 0, true);
+            Kup_rem->set_size(nu, 0, true);
 
 #pragma omp parallel for
             for(ptrdiff_t i = 0; i < n; ++i) {
@@ -411,6 +418,9 @@ class schur_pressure_correction {
 
             tmp = backend_type::create_vector(nu, bprm);
 
+            if (prm.approx_schur)
+                M = backend_type::copy_vector(diagonal(*Kuu, /*invert = */true), bprm);
+
             // Scatter/Gather matrices
             boost::shared_ptr<build_matrix> x2u = boost::make_shared<build_matrix>();
             boost::shared_ptr<build_matrix> x2p = boost::make_shared<build_matrix>();
@@ -495,53 +505,57 @@ class schur_pressure_correction {
 #endif
                 ) const
         {
-            TIC("split variables");
+            AMGCL_TIC("split variables");
             backend::spmv(1, *x2u, rhs, 0, *rhs_u);
             backend::spmv(1, *x2p, rhs, 0, *rhs_p);
-            TOC("split variables");
+            AMGCL_TOC("split variables");
 
             // Ai u = rhs_u
-            TIC("solve U");
+            AMGCL_TIC("solve U");
             backend::clear(*u);
             report("U1", (*U)(*rhs_u, *u));
-            TOC("solve U");
+            AMGCL_TOC("solve U");
 
             // rhs_p -= Kpu u
-            TIC("solve P");
+            AMGCL_TIC("solve P");
             backend::spmv(-1, *Kpu, *u, 1, *rhs_p);
 
             // S p = rhs_p
             backend::clear(*p);
             report("P", (*P)(*this, *rhs_p, *p));
-            TOC("solve P");
+            AMGCL_TOC("solve P");
 
             // rhs_u -= Kup p
-            TIC("Update U");
+            AMGCL_TIC("Update U");
             backend::spmv(-1, *Kup, *p, 1, *rhs_u);
 
             // Ai u = rhs_u
             backend::clear(*u);
             report("U2", (*U)(*rhs_u, *u));
-            TOC("Update U");
+            AMGCL_TOC("Update U");
 
-            TIC("merge variables");
+            AMGCL_TIC("merge variables");
             backend::clear(x);
             backend::spmv(1, *u2x, *u, 1, x);
             backend::spmv(1, *p2x, *p, 1, x);
-            TOC("merge variables");
+            AMGCL_TOC("merge variables");
         }
 
         template <class Alpha, class Vec1, class Beta, class Vec2>
         void spmv(Alpha alpha, const Vec1 &x, Beta beta, Vec2 &y) const {
             // y = beta y + alpha S x, where S = Kpp - Kpu Kuu^-1 Kup
-            TIC("matrix-free spmv");
+            AMGCL_TIC("matrix-free spmv");
             backend::spmv(alpha, P->system_matrix(), x, beta, y);
 
             backend::spmv(1, *Kup, x, 0, *tmp);
-            backend::clear(*u);
-            (*U)(*tmp, *u);
+            if (prm.approx_schur) {
+                backend::vmul(1, *M, *tmp, 0, *u);
+            } else {
+                backend::clear(*u);
+                (*U)(*tmp, *u);
+            }
             backend::spmv(-alpha, *Kpu, *u, 1, y);
-            TOC("matrix-free spmv");
+            AMGCL_TOC("matrix-free spmv");
         }
     private:
         typedef comm_pattern<backend_type> CommPattern;
@@ -553,6 +567,7 @@ class schur_pressure_correction {
         boost::shared_ptr<bmatrix>  x2p, x2u, p2x, u2x;
         boost::shared_ptr<matrix> K, Kpu, Kup;
         boost::shared_ptr<vector>  rhs_u, rhs_p, u, p, tmp;
+        boost::shared_ptr<typename backend_type::matrix_diagonal> M;
 
         boost::shared_ptr<USolver> U;
         boost::shared_ptr<PSolver> P;
diff --git a/external_libraries/amgcl/mpi/subdomain_deflation.hpp b/external_libraries/amgcl/mpi/subdomain_deflation.hpp
index 97aae2c3e1d1..f9c74f3cc2d8 100644
--- a/external_libraries/amgcl/mpi/subdomain_deflation.hpp
+++ b/external_libraries/amgcl/mpi/subdomain_deflation.hpp
@@ -145,7 +145,7 @@ class subdomain_deflation {
           q( backend_type::create_vector(nrows, bprm) ),
           S(nrows, prm.isolver, bprm, mpi::inner_product(mpi_comm))
         {
-            TIC("setup deflation");
+            AMGCL_TIC("setup deflation");
             typedef backend::crs<value_type, ptrdiff_t>                build_matrix;
             typedef typename backend::row_iterator<Matrix>::type       row_iterator1;
             typedef typename backend::row_iterator<build_matrix>::type row_iterator2;
@@ -170,7 +170,7 @@ class subdomain_deflation {
             ptrdiff_t loc_end = domain[comm.rank + 1];
 
             // Fill deflation vectors.
-            TIC("copy deflation vectors");
+            AMGCL_TIC("copy deflation vectors");
             {
                 std::vector<value_type> z(nrows);
                 for(int j = 0; j < ndv; ++j) {
@@ -179,12 +179,12 @@ class subdomain_deflation {
                     Z[j] = backend_type::copy_vector(z, bprm);
                 }
             }
-            TOC("copy deflation vectors");
+            AMGCL_TOC("copy deflation vectors");
 
             // Number of nonzeros in local and remote parts of the matrix.
             ptrdiff_t loc_nnz = 0, rem_nnz = 0;
 
-            TIC("first pass");
+            AMGCL_TIC("first pass");
             // First pass over Astrip rows:
             // 1. Count local and remote nonzeros,
             // 3. Build sparsity pattern of matrix AZ.
@@ -218,9 +218,9 @@ class subdomain_deflation {
                     }
                 }
             }
-            TOC("first pass");
+            AMGCL_TOC("first pass");
 
-            TIC("second pass");
+            AMGCL_TIC("second pass");
             // Second pass over Astrip rows:
             // 1. Build local and remote matrix parts.
             // 2. Build local part of AZ matrix.
@@ -273,7 +273,7 @@ class subdomain_deflation {
                 aloc->ptr[i+1] = loc_head;
                 arem->ptr[i+1] = rem_head;
             }
-            TOC("second pass");
+            AMGCL_TOC("second pass");
 
             // Create local preconditioner.
             P = boost::make_shared<LocalPrecond>( *aloc, prm.local, bprm );
@@ -284,7 +284,7 @@ class subdomain_deflation {
             Arem = backend_type::copy_matrix(arem, bprm);
             A = boost::make_shared<matrix>(*C, P->system_matrix(), *Arem);
 
-            TIC("A*Z");
+            AMGCL_TIC("A*Z");
             /* Finish construction of AZ */
             // Exchange deflation vectors
             std::vector<ptrdiff_t> zrecv_ptr(C->recv.nbr.size() + 1, 0);
@@ -358,12 +358,12 @@ class subdomain_deflation {
 
             std::rotate(az->ptr, az->ptr + nrows, az->ptr + nrows + 1);
             az->ptr[0] = 0;
-            TOC("A*Z");
+            AMGCL_TOC("A*Z");
 
             MPI_Waitall(C->send.req.size(), &C->send.req[0], MPI_STATUSES_IGNORE);
 
             /* Build deflated matrix E. */
-            TIC("assemble E");
+            AMGCL_TIC("assemble E");
             // Who is responsible for solution of coarse problem
             int nmasters = std::min(comm.size, DirectSolver::comm_size(nz, prm.dsolver));
             int nslaves  = (comm.size + nmasters - 1) / nmasters;
@@ -488,10 +488,10 @@ class subdomain_deflation {
                         dtype, 0, slaves_comm
                         );
             }
-            TOC("assemble E");
+            AMGCL_TOC("assemble E");
 
             // Prepare E factorization.
-            TIC("factorize E");
+            AMGCL_TIC("factorize E");
             if (comm.rank == master_rank) {
                 E = boost::make_shared<DirectSolver>(
                         masters_comm, Eptr.size() - 1, Eptr, Ecol, Eval, prm.dsolver
@@ -500,9 +500,9 @@ class subdomain_deflation {
                 cf.resize(Eptr.size() - 1);
                 cx.resize(Eptr.size() - 1);
             }
-            TOC("factorize E");
+            AMGCL_TOC("factorize E");
 
-            TOC("setup deflation");
+            AMGCL_TOC("setup deflation");
 
             // Move matrices to backend.
             AZ = backend_type::copy_matrix(az, bprm);
@@ -561,18 +561,18 @@ class subdomain_deflation {
 
         template <class Vec1, class Vec2>
         void mul(value_type alpha, const Vec1 &x, value_type beta, Vec2 &y) const {
-            TIC("top/spmv");
+            AMGCL_TIC("top/spmv");
             backend::spmv(alpha, *A, x, beta, y);
-            TOC("top/spmv");
+            AMGCL_TOC("top/spmv");
 
             project(y);
         }
 
         template <class Vec1, class Vec2, class Vec3>
         void residual(const Vec1 &f, const Vec2 &x, Vec3 &r) const {
-            TIC("top/residual");
+            AMGCL_TIC("top/residual");
             backend::residual(f, *A, x, r);
-            TOC("top/residual");
+            AMGCL_TOC("top/residual");
 
             project(r);
         }
@@ -614,27 +614,27 @@ class subdomain_deflation {
 
         template <class Vector>
         void project(Vector &x) const {
-            TIC("project");
+            AMGCL_TIC("project");
 
-            TIC("local inner product");
+            AMGCL_TIC("local inner product");
             for(ptrdiff_t j = 0; j < ndv; ++j)
                 df[j] = backend::inner_product(x, *Z[j]);
-            TOC("local inner product");
+            AMGCL_TOC("local inner product");
 
             coarse_solve(df, dx);
 
-            TIC("spmv");
+            AMGCL_TIC("spmv");
             backend::copy_to_backend(dx, *dd);
             backend::spmv(-1, *AZ, *dd, 1, x);
-            TOC("spmv");
+            AMGCL_TOC("spmv");
 
-            TOC("project");
+            AMGCL_TOC("project");
         }
 
         void coarse_solve(std::vector<value_type> &f, std::vector<value_type> &x) const
         {
-            TIC("coarse solve");
-            TIC("exchange rhs");
+            AMGCL_TIC("coarse solve");
+            AMGCL_TIC("exchange rhs");
             if (comm.rank == master_rank) {
                 MPI_Gatherv(
                         &f[0], f.size(), dtype, &cf[0],
@@ -647,41 +647,41 @@ class subdomain_deflation {
                         dtype, 0, slaves_comm
                         );
             }
-            TOC("exchange rhs");
+            AMGCL_TOC("exchange rhs");
 
             if (comm.rank == master_rank) {
-                TIC("call solver");
+                AMGCL_TIC("call solver");
                 (*E)(cf, cx);
-                TOC("call solver");
+                AMGCL_TOC("call solver");
 
-                TIC("gather result");
+                AMGCL_TIC("gather result");
                 MPI_Gatherv(
                         &cx[0], cx.size(), dtype, &x[0],
                         const_cast<int*>(&msize[0]), const_cast<int*>(&mstart[0]),
                         dtype, 0, masters_comm
                         );
-                TOC("gather result");
+                AMGCL_TOC("gather result");
             }
 
-            TIC("broadcast result");
+            AMGCL_TIC("broadcast result");
             MPI_Bcast(&x[0], x.size(), dtype, 0, comm);
-            TOC("broadcast result");
-            TOC("coarse solve");
+            AMGCL_TOC("broadcast result");
+            AMGCL_TOC("coarse solve");
         }
 
         template <class Vec1, class Vec2>
         void postprocess(const Vec1 &rhs, Vec2 &x) const {
-            TIC("postprocess");
+            AMGCL_TIC("postprocess");
 
             // q = Ax
             backend::spmv(1, *A, x, 0, *q);
 
             // df = transp(Z) * (rhs - Ax)
-            TIC("local inner product");
+            AMGCL_TIC("local inner product");
             for(ptrdiff_t j = 0; j < ndv; ++j)
                 df[j] = backend::inner_product(rhs, *Z[j])
                       - backend::inner_product(*q,  *Z[j]);
-            TOC("local inner product");
+            AMGCL_TOC("local inner product");
 
             // dx = inv(E) * df
             coarse_solve(df, dx);
@@ -689,7 +689,7 @@ class subdomain_deflation {
             // x += Z * dx
             backend::lin_comb(ndv, &dx[dv_start[comm.rank]], Z, 1, x);
 
-            TOC("postprocess");
+            AMGCL_TOC("postprocess");
         }
 
 };
diff --git a/external_libraries/amgcl/preconditioner/dummy.hpp b/external_libraries/amgcl/preconditioner/dummy.hpp
index 77547276e189..acd3612a543f 100644
--- a/external_libraries/amgcl/preconditioner/dummy.hpp
+++ b/external_libraries/amgcl/preconditioner/dummy.hpp
@@ -63,7 +63,7 @@ class dummy {
 
         dummy(
                 boost::shared_ptr<build_matrix> M,
-                const params &prm = params(),
+                const params& = params(),
                 const backend_params &bprm = backend_params()
                 )
             : A(Backend::copy_matrix(M, bprm))
diff --git a/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp b/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp
index 1e33193a96eb..6b5a1922f18d 100644
--- a/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp
+++ b/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp
@@ -118,11 +118,17 @@ class schur_pressure_correction {
 
             std::vector<char> pmask;
 
-            params() {}
+            // Approximate Kuu^-1 with inverted diagonal of Kuu during
+            // construction of matrix-less Schur complement.
+            // When false, USolver is used instead.
+            bool approx_schur;
+
+            params() : approx_schur(true) {}
 
             params(const boost::property_tree::ptree &p)
                 : AMGCL_PARAMS_IMPORT_CHILD(p, usolver),
-                  AMGCL_PARAMS_IMPORT_CHILD(p, psolver)
+                  AMGCL_PARAMS_IMPORT_CHILD(p, psolver),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, approx_schur)
             {
                 void *pm = 0;
                 size_t n = 0;
@@ -141,13 +147,14 @@ class schur_pressure_correction {
 
                 pmask.assign(static_cast<char*>(pm), static_cast<char*>(pm) + n);
 
-                AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size));
+                AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size)(approx_schur));
             }
 
             void get(boost::property_tree::ptree &p, const std::string &path = "") const
             {
                 AMGCL_PARAMS_EXPORT_CHILD(p, path, usolver);
                 AMGCL_PARAMS_EXPORT_CHILD(p, path, psolver);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, approx_schur);
             }
         } prm;
 
@@ -218,8 +225,14 @@ class schur_pressure_correction {
             backend::spmv( alpha, P->system_matrix(), x, beta, y);
 
             backend::spmv(1, *Kup, x, 0, *tmp);
-            backend::clear(*u);
-            (*U)(*tmp, *u);
+
+            if (prm.approx_schur) {
+                backend::vmul(1, *M, *tmp, 0, *u);
+            } else {
+                backend::clear(*u);
+                (*U)(*tmp, *u);
+            }
+
             backend::spmv(-alpha, *Kpu, *u, 1, y);
         }
     private:
@@ -227,6 +240,7 @@ class schur_pressure_correction {
 
         boost::shared_ptr<matrix> K, Kup, Kpu, x2u, x2p, u2x, p2x;
         boost::shared_ptr<vector> rhs_u, rhs_p, u, p, tmp;
+        boost::shared_ptr<typename backend_type::matrix_diagonal> M;
 
         boost::shared_ptr<USolver> U;
         boost::shared_ptr<PSolver> P;
@@ -345,6 +359,9 @@ class schur_pressure_correction {
 
             tmp = backend_type::create_vector(nu, bprm);
 
+            if (prm.approx_schur)
+                M = backend_type::copy_vector(diagonal(*Kuu, /*invert = */true), bprm);
+
             // Scatter/Gather matrices
             boost::shared_ptr<build_matrix> x2u = boost::make_shared<build_matrix>();
             boost::shared_ptr<build_matrix> x2p = boost::make_shared<build_matrix>();
diff --git a/external_libraries/amgcl/profiler.hpp b/external_libraries/amgcl/profiler.hpp
index 3dcc773cf815..56a33eb83931 100644
--- a/external_libraries/amgcl/profiler.hpp
+++ b/external_libraries/amgcl/profiler.hpp
@@ -55,6 +55,7 @@ template <class Counter = amgcl::perf_counter::clock, unsigned SHIFT_WIDTH = 2>
 class profiler {
     public:
         typedef typename Counter::value_type value_type;
+        typedef double delta_type;
 
         /// Initialization.
         /**
@@ -79,12 +80,12 @@ class profiler {
         /**
          * Returns delta in the measured value since the corresponding tic().
          */
-        value_type toc(const std::string& /*name*/) {
+        delta_type toc(const std::string& /*name*/) {
             profile_unit *top = stack.back();
             stack.pop_back();
 
             value_type current = counter.current();
-            value_type delta   = current - top->begin;
+            delta_type delta   = current - top->begin;
 
             top->length += delta;
             root.length = current - root.begin;
@@ -105,8 +106,8 @@ class profiler {
         struct profile_unit {
             profile_unit() : length(0) {}
 
-            value_type children_time() const {
-                value_type s = value_type();
+            delta_type children_time() const {
+                delta_type s = delta_type();
                 for(typename std::map<std::string, profile_unit>::const_iterator c = children.begin(); c != children.end(); c++)
                     s += c->second.length;
                 return s;
@@ -120,7 +121,7 @@ class profiler {
             }
 
             void print(std::ostream &out, const std::string &name,
-                    int level, value_type total, size_t width) const
+                    int level, delta_type total, size_t width) const
             {
                 using namespace std;
 
@@ -128,7 +129,7 @@ class profiler {
                 print_line(out, name, length, 100 * length / total, width - level);
 
                 if (children.size()) {
-                    value_type val = length - children_time();
+                    delta_type val = length - children_time();
                     double perc = 100.0 * val / total;
 
                     if (perc > 1e-1) {
@@ -142,7 +143,7 @@ class profiler {
             }
 
             void print_line(std::ostream &out, const std::string &name,
-                    value_type time, double perc, size_t width) const
+                    delta_type time, double perc, size_t width) const
             {
                 using namespace std;
 
@@ -155,7 +156,7 @@ class profiler {
             }
 
             value_type begin;
-            value_type length;
+            delta_type length;
 
             std::map<std::string, profile_unit> children;
         };
diff --git a/external_libraries/amgcl/relaxation/chebyshev.hpp b/external_libraries/amgcl/relaxation/chebyshev.hpp
index 541176decb24..924d48067a12 100644
--- a/external_libraries/amgcl/relaxation/chebyshev.hpp
+++ b/external_libraries/amgcl/relaxation/chebyshev.hpp
@@ -185,10 +185,12 @@ class chebyshev {
             }
         }
 
+        // Uses Gershgorin disc theorem to estimate spectral radius of the
+        // matrix
         template <class Matrix>
         static scalar_type spectral_radius(const Matrix &A) {
             typedef typename backend::row_iterator<Matrix>::type row_iterator;
-            const size_t n = rows(A);
+            const ptrdiff_t n = rows(A);
 
             scalar_type emax = 0;
 
@@ -196,7 +198,7 @@ class chebyshev {
             {
                 scalar_type my_emax = 0;
 #pragma omp for nowait
-                for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
+                for(ptrdiff_t i = 0; i < n; ++i) {
                     scalar_type hi = 0;
 
                     for(row_iterator a = backend::row_begin(A, i); a; ++a)
diff --git a/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp b/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp
index ebaf27e82d05..b523ea346ae6 100644
--- a/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp
+++ b/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp
@@ -1,7 +1,39 @@
 #ifndef AMGCL_RELAXATION_DETAIL_ILU_SOLVE_HPP
 #define AMGCL_RELAXATION_DETAIL_ILU_SOLVE_HPP
 
+/*
+The MIT License
+
+Copyright (c) 2012-2017 Denis Demidov <dennis.demidov@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/**
+ * \file   amgcl/relaxation/detail/ilu_solve.hpp
+ * \author Denis Demidov <dennis.demidov@gmail.com>
+ * \brief  Solver for sparse triangular systems obtained as a result of an
+ *         incomplete LU factorization.
+ */
+
 #include <amgcl/backend/interface.hpp>
+#include <amgcl/util.hpp>
 
 namespace amgcl {
 namespace relaxation {
@@ -9,7 +41,8 @@ namespace detail {
 
 template <class Backend>
 class ilu_solve {
-    private:
+    public:
+        typedef typename Backend::params backend_params;
         typedef typename Backend::value_type value_type;
         typedef typename Backend::matrix matrix;
         typedef typename Backend::vector vector;
@@ -17,23 +50,37 @@ class ilu_solve {
         typedef typename backend::builtin<value_type>::matrix build_matrix;
         typedef typename math::scalar_of<value_type>::type scalar_type;
 
-        unsigned jacobi_iters;
+        struct params {
+            /// Number of Jacobi iterations.
+            unsigned    iters;
 
-        boost::shared_ptr<matrix> L;
-        boost::shared_ptr<matrix> U;
-        boost::shared_ptr<matrix_diagonal> D;
-        boost::shared_ptr<vector> t1, t2;
+            /// Damping factor.
+            scalar_type damping;
+
+            params() : iters(2), damping(0.72) {}
+
+            params(const boost::property_tree::ptree &p)
+                : AMGCL_PARAMS_IMPORT_VALUE(p, iters)
+                , AMGCL_PARAMS_IMPORT_VALUE(p, damping)
+            {
+                AMGCL_PARAMS_CHECK(p, (iters)(damping));
+            }
+
+            void get(boost::property_tree::ptree &p, const std::string &path) const {
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, iters);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, damping);
+            }
+        } prm;
 
     public:
-        template <class Params>
         ilu_solve(
                 boost::shared_ptr<build_matrix> L,
                 boost::shared_ptr<build_matrix> U,
                 boost::shared_ptr<backend::numa_vector<value_type> > D,
-                const Params &prm,
-                const typename Backend::params &bprm
+                const params &prm = params(),
+                const backend_params &bprm = backend_params()
                 ) :
-            jacobi_iters(prm.jacobi_iters),
+            prm(prm),
             L(Backend::copy_matrix(L, bprm)),
             U(Backend::copy_matrix(U, bprm)),
             D(Backend::copy_vector(D, bprm)),
@@ -46,24 +93,31 @@ class ilu_solve {
             vector *y0 = t1.get();
             vector *y1 = t2.get();
 
-            backend::copy(x, *y0);
-            for(unsigned i = 0; i < jacobi_iters; ++i) {
+            backend::axpby(prm.damping, x, 0.0, *y0);
+            for(unsigned i = 0; i < prm.iters; ++i) {
                 backend::residual(x, *L, *y0, *y1);
-                std::swap(y0, y1);
+                backend::axpby(prm.damping, *y1, (1-prm.damping), *y0);
             }
 
-            backend::copy(*y0, x);
-            for(unsigned i = 0; i < jacobi_iters; ++i) {
+            backend::vmul(prm.damping, *D, *y0, 0.0, x);
+            for(unsigned i = 0; i < prm.iters; ++i) {
                 backend::residual(*y0, *U, x, *y1);
-                backend::vmul(math::identity<scalar_type>(), *D, *y1, math::zero<scalar_type>(), x);
+                backend::vmul(prm.damping, *D, *y1, (1-prm.damping), x);
             }
         }
+
+    private:
+        boost::shared_ptr<matrix> L;
+        boost::shared_ptr<matrix> U;
+        boost::shared_ptr<matrix_diagonal> D;
+        boost::shared_ptr<vector> t1, t2;
 };
 
 template <class value_type>
 class ilu_solve< backend::builtin<value_type> > {
     public:
         typedef backend::builtin<value_type> Backend;
+        typedef typename Backend::params backend_params;
         typedef typename Backend::matrix matrix;
         typedef typename Backend::vector vector;
         typedef typename Backend::matrix_diagonal matrix_diagonal;
@@ -71,23 +125,40 @@ class ilu_solve< backend::builtin<value_type> > {
         typedef typename Backend::rhs_type rhs_type;
         typedef typename math::scalar_of<value_type>::type scalar_type;
 
-        template <class Params>
+        struct params {
+            /// Use serial version of the algorithm
+            bool serial;
+
+            params() : serial(num_threads() < 4) {}
+
+            params(const boost::property_tree::ptree &p)
+                : AMGCL_PARAMS_IMPORT_VALUE(p, serial)
+            {
+                AMGCL_PARAMS_CHECK(p, (serial));
+            }
+
+            void get(boost::property_tree::ptree &p, const std::string &path) const {
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, serial);
+            }
+        } prm;
+
         ilu_solve(
                 boost::shared_ptr<build_matrix> L,
                 boost::shared_ptr<build_matrix> U,
                 boost::shared_ptr<backend::numa_vector<value_type> > D,
-                const Params &prm, const typename Backend::params &bprm
-                ) : is_serial(prm.serial || num_threads() < 4)
+                const params &prm = params(),
+                const backend_params& = backend_params()
+                ) : prm(prm)
         {
-            if (is_serial)
-                serial_init(L, U, D, bprm);
+            if (prm.serial)
+                serial_init(L, U, D);
             else
-                parallel_init(L, U, D, bprm);
+                parallel_init(L, U, D);
         }
 
         template <class Vector>
         void solve(Vector &x) {
-            if (is_serial)
+            if (prm.serial)
                 serial_solve(x);
             else
                 parallel_solve(x);
@@ -121,8 +192,7 @@ class ilu_solve< backend::builtin<value_type> > {
         void serial_init(
                 boost::shared_ptr<build_matrix>    L,
                 boost::shared_ptr<build_matrix>    U,
-                boost::shared_ptr<matrix_diagonal> D,
-                const typename Backend::params&
+                boost::shared_ptr<matrix_diagonal> D
                 )
         {
             this->L = L;
@@ -325,8 +395,7 @@ class ilu_solve< backend::builtin<value_type> > {
         void parallel_init(
                 boost::shared_ptr<build_matrix> L,
                 boost::shared_ptr<build_matrix> U,
-                boost::shared_ptr<backend::numa_vector<value_type> > D,
-                const typename Backend::params&
+                boost::shared_ptr<backend::numa_vector<value_type> > D
                 )
         {
             lower = boost::make_shared< sptr_solve<true > >(*L, D->data());
diff --git a/external_libraries/amgcl/relaxation/ilu0.hpp b/external_libraries/amgcl/relaxation/ilu0.hpp
index b8cb1f99f6a8..ce0623bf4e80 100644
--- a/external_libraries/amgcl/relaxation/ilu0.hpp
+++ b/external_libraries/amgcl/relaxation/ilu0.hpp
@@ -55,33 +55,28 @@ struct ilu0 {
     typedef typename Backend::matrix_diagonal matrix_diagonal;
 
     typedef typename math::scalar_of<value_type>::type scalar_type;
+    typedef detail::ilu_solve<Backend> ilu_solve;
 
     /// Relaxation parameters.
     struct params {
         /// Damping factor.
         scalar_type damping;
 
-        /// Use serial version of the algorithm
-        bool serial;
+        /// Parameters for sparse triangular system solver
+        typename ilu_solve::params solve;
 
-        /// Number of Jacobi iterations.
-        /** \note Used for approximate solution of triangular systems on parallel backends */
-        unsigned jacobi_iters;
-
-        params() : damping(1), serial(false), jacobi_iters(2) {}
+        params() : damping(1) {}
 
         params(const boost::property_tree::ptree &p)
             : AMGCL_PARAMS_IMPORT_VALUE(p, damping)
-            , AMGCL_PARAMS_IMPORT_VALUE(p, serial)
-            , AMGCL_PARAMS_IMPORT_VALUE(p, jacobi_iters)
+            , AMGCL_PARAMS_IMPORT_CHILD(p, solve)
         {
-            AMGCL_PARAMS_CHECK(p, (damping)(serial)(jacobi_iters));
+            AMGCL_PARAMS_CHECK(p, (damping)(solve));
         }
 
         void get(boost::property_tree::ptree &p, const std::string &path) const {
             AMGCL_PARAMS_EXPORT_VALUE(p, path, damping);
-            AMGCL_PARAMS_EXPORT_VALUE(p, path, serial);
-            AMGCL_PARAMS_EXPORT_VALUE(p, path, jacobi_iters);
+            AMGCL_PARAMS_EXPORT_CHILD(p, path, solve);
         }
     };
 
@@ -176,7 +171,7 @@ struct ilu0 {
                 work[A.col[j]] = NULL;
         }
 
-        ilu = boost::make_shared<ilu_solve>(L, U, D, prm, bprm);
+        ilu = boost::make_shared<ilu_solve>(L, U, D, prm.solve, bprm);
     }
 
     /// \copydoc amgcl::relaxation::damped_jacobi::apply_pre
@@ -212,7 +207,6 @@ struct ilu0 {
     }
 
     private:
-        typedef detail::ilu_solve<Backend> ilu_solve;
         boost::shared_ptr<ilu_solve> ilu;
 
 };
diff --git a/external_libraries/amgcl/relaxation/iluk.hpp b/external_libraries/amgcl/relaxation/iluk.hpp
index a493b2b0cbf8..66bddd854410 100644
--- a/external_libraries/amgcl/relaxation/iluk.hpp
+++ b/external_libraries/amgcl/relaxation/iluk.hpp
@@ -55,6 +55,8 @@ struct iluk {
 
     typedef typename math::scalar_of<value_type>::type scalar_type;
 
+    typedef detail::ilu_solve<Backend> ilu_solve;
+
     /// Relaxation parameters.
     struct params {
         /// Level of fill-in.
@@ -63,29 +65,23 @@ struct iluk {
         /// Damping factor.
         scalar_type damping;
 
-        /// Use serial version of the algorithm
-        bool serial;
-
-        /// Number of Jacobi iterations.
-        /** \note Used for approximate solution of triangular systems on parallel backends */
-        unsigned jacobi_iters;
+        /// Parameters for sparse triangular system solver
+        typename ilu_solve::params solve;
 
-        params() : k(1), damping(1), serial(false), jacobi_iters(2) {}
+        params() : k(1), damping(1) {}
 
         params(const boost::property_tree::ptree &p)
             : AMGCL_PARAMS_IMPORT_VALUE(p, k)
             , AMGCL_PARAMS_IMPORT_VALUE(p, damping)
-            , AMGCL_PARAMS_IMPORT_VALUE(p, serial)
-            , AMGCL_PARAMS_IMPORT_VALUE(p, jacobi_iters)
+            , AMGCL_PARAMS_IMPORT_CHILD(p, solve)
         {
-            AMGCL_PARAMS_CHECK(p, (k)(damping)(serial)(jacobi_iters));
+            AMGCL_PARAMS_CHECK(p, (k)(damping)(solve));
         }
 
         void get(boost::property_tree::ptree &p, const std::string &path) const {
             AMGCL_PARAMS_EXPORT_VALUE(p, path, k);
             AMGCL_PARAMS_EXPORT_VALUE(p, path, damping);
-            AMGCL_PARAMS_EXPORT_VALUE(p, path, serial);
-            AMGCL_PARAMS_EXPORT_VALUE(p, path, jacobi_iters);
+            AMGCL_PARAMS_EXPORT_CHILD(p, path, solve);
         }
     };
 
@@ -154,7 +150,7 @@ struct iluk {
         ilu = boost::make_shared<ilu_solve>(
                 boost::make_shared<build_matrix>(n, n, Lptr, Lcol, Lval),
                 boost::make_shared<build_matrix>(n, n, Uptr, Ucol, Uval),
-                D, prm, bprm);
+                D, prm.solve, bprm);
     }
 
     /// \copydoc amgcl::relaxation::damped_jacobi::apply_pre
@@ -189,7 +185,6 @@ struct iluk {
     }
 
     private:
-        typedef detail::ilu_solve<Backend> ilu_solve;
         boost::shared_ptr<ilu_solve> ilu;
 
         struct nonzero {
diff --git a/external_libraries/amgcl/relaxation/ilut.hpp b/external_libraries/amgcl/relaxation/ilut.hpp
index 9cdbdf04872f..162e97f169b9 100644
--- a/external_libraries/amgcl/relaxation/ilut.hpp
+++ b/external_libraries/amgcl/relaxation/ilut.hpp
@@ -62,6 +62,8 @@ struct ilut {
 
     typedef typename math::scalar_of<value_type>::type scalar_type;
 
+    typedef detail::ilu_solve<Backend> ilu_solve;
+
     /// Relaxation parameters.
     struct params {
         /// Fill factor.
@@ -73,31 +75,25 @@ struct ilut {
         /// Damping factor.
         scalar_type damping;
 
-        /// Use serial version of the algorithm
-        bool serial;
-
-        /// Number of Jacobi iterations.
-        /** \note Used for approximate solution of triangular systems on parallel backends */
-        unsigned jacobi_iters;
+        /// Parameters for sparse triangular system solver
+        typename ilu_solve::params solve;
 
-        params() : p(2), tau(1e-2f), damping(1), serial(false), jacobi_iters(2) {}
+        params() : p(2), tau(1e-2f), damping(1) {}
 
         params(const boost::property_tree::ptree &p)
             : AMGCL_PARAMS_IMPORT_VALUE(p, p)
             , AMGCL_PARAMS_IMPORT_VALUE(p, tau)
             , AMGCL_PARAMS_IMPORT_VALUE(p, damping)
-            , AMGCL_PARAMS_IMPORT_VALUE(p, serial)
-            , AMGCL_PARAMS_IMPORT_VALUE(p, jacobi_iters)
+            , AMGCL_PARAMS_IMPORT_CHILD(p, solve)
         {
-            AMGCL_PARAMS_CHECK(p, (p)(tau)(damping)(serial)(jacobi_iters));
+            AMGCL_PARAMS_CHECK(p, (p)(tau)(damping)(solve));
         }
 
         void get(boost::property_tree::ptree &p, const std::string &path) const {
             AMGCL_PARAMS_EXPORT_VALUE(p, path, p);
             AMGCL_PARAMS_EXPORT_VALUE(p, path, tau);
             AMGCL_PARAMS_EXPORT_VALUE(p, path, damping);
-            AMGCL_PARAMS_EXPORT_VALUE(p, path, serial);
-            AMGCL_PARAMS_EXPORT_VALUE(p, path, jacobi_iters);
+            AMGCL_PARAMS_EXPORT_CHILD(p, path, solve);
         }
     };
 
@@ -179,7 +175,7 @@ struct ilut {
         L->nnz = L->ptr[n];
         U->nnz = U->ptr[n];
 
-        ilu = boost::make_shared<ilu_solve>(L, U, D, prm, bprm);
+        ilu = boost::make_shared<ilu_solve>(L, U, D, prm.solve, bprm);
     }
 
     /// \copydoc amgcl::relaxation::damped_jacobi::apply_pre
@@ -215,7 +211,6 @@ struct ilut {
 
     private:
         typedef typename backend::builtin<value_type>::matrix build_matrix;
-        typedef detail::ilu_solve<Backend> ilu_solve;
         boost::shared_ptr<ilu_solve> ilu;
 
         struct sparse_vector {
diff --git a/external_libraries/amgcl/runtime.hpp b/external_libraries/amgcl/runtime.hpp
index c2cdf5300840..c74f6d134fba 100644
--- a/external_libraries/amgcl/runtime.hpp
+++ b/external_libraries/amgcl/runtime.hpp
@@ -54,6 +54,7 @@ THE SOFTWARE.
 #include <amgcl/solver/gmres.hpp>
 #include <amgcl/solver/lgmres.hpp>
 #include <amgcl/solver/fgmres.hpp>
+#include <amgcl/solver/idrs.hpp>
 #include <amgcl/solver/detail/default_inner_product.hpp>
 
 
@@ -522,7 +523,8 @@ enum type {
     bicgstabl,  ///< BiCGStab(ell)
     gmres,      ///< GMRES
     lgmres,     ///< LGMRES
-    fgmres      ///< FGMRES
+    fgmres,     ///< FGMRES
+    idrs        ///< IDR(s)
 };
 
 inline std::ostream& operator<<(std::ostream &os, type s)
@@ -540,6 +542,8 @@ inline std::ostream& operator<<(std::ostream &os, type s)
             return os << "lgmres";
         case fgmres:
             return os << "fgmres";
+        case idrs:
+            return os << "idrs";
         default:
             return os << "???";
     }
@@ -562,6 +566,8 @@ inline std::istream& operator>>(std::istream &in, type &s)
         s = lgmres;
     else if (val == "fgmres")
         s = fgmres;
+    else if (val == "idrs")
+        s = idrs;
     else
         throw std::invalid_argument("Invalid solver value");
 
@@ -619,6 +625,12 @@ inline void process_solver(
                 func.template process<Solver>();
             }
             break;
+        case runtime::solver::idrs:
+            {
+                typedef amgcl::solver::idrs<Backend, InnerProduct> Solver;
+                func.template process<Solver>();
+            }
+            break;
     }
 }
 
diff --git a/external_libraries/amgcl/solver/skyline_lu.hpp b/external_libraries/amgcl/solver/skyline_lu.hpp
index 5c593983334a..434957a73a36 100644
--- a/external_libraries/amgcl/solver/skyline_lu.hpp
+++ b/external_libraries/amgcl/solver/skyline_lu.hpp
@@ -70,126 +70,17 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <algorithm>
 
 #include <amgcl/backend/interface.hpp>
+#include <amgcl/value_type/interface.hpp>
+#include <amgcl/reorder/cuthill_mckee.hpp>
 #include <amgcl/util.hpp>
 
 namespace amgcl {
 namespace solver {
 
-namespace matrix_permutation {
-
-struct none {
-    template <class Matrix>
-    static void get(const Matrix&, std::vector<int> &perm) {
-        for(size_t i = 0; i < perm.size(); ++i) perm[i] = i;
-    }
-};
-
-template <bool reverse = false>
-struct CuthillMcKee {
-    template <class Matrix>
-    static void get(const Matrix &A, std::vector<int> &perm) {
-        typedef typename backend::row_iterator<Matrix>::type row_iterator;
-        const int n = backend::rows(A);
-
-        /* The data structure used to sort and traverse the level sets:
-         *
-         * The current level set is currentLevelSet;
-         * In this level set, there are nodes with degrees from 0 (not really
-         * useful) to maxDegreeInCurrentLevelSet.
-         * firstWithDegree[i] points to a node with degree i, or to -1 if it
-         * does not exist. nextSameDegree[firstWithDegree[i]] points to the
-         * second node with that degree, etc.
-         * While the level set is being traversed, the structure for the next
-         * level set is generated; nMDICLS will be the next
-         * maxDegreeInCurrentLevelSet and nFirstWithDegree will be
-         * firstWithDegree.
-         */
-        int initialNode = 0; // node to start search
-        int maxDegree   = 0;
-
-        std::vector<int> degree(n);
-        std::vector<int> levelSet(n, 0);
-        std::vector<int> nextSameDegree(n, -1);
-
-        for(int i = 0; i < n; ++i) {
-            degree[i] = backend::row_nonzeros(A, i);
-            maxDegree = std::max(maxDegree, degree[i]);
-        }
-
-        std::vector<int> firstWithDegree(maxDegree + 1, -1);
-        std::vector<int> nFirstWithDegree(maxDegree + 1);
-
-        // Initialize the first level set, made up by initialNode alone
-        perm[0] = initialNode;
-        int currentLevelSet = 1;
-        levelSet[initialNode] = currentLevelSet;
-        int maxDegreeInCurrentLevelSet = degree[initialNode];
-        firstWithDegree[maxDegreeInCurrentLevelSet] = initialNode;
-
-        // Main loop
-        for (int next = 1; next < n; ) {
-            int nMDICLS = 0;
-            std::fill(nFirstWithDegree.begin(), nFirstWithDegree.end(), -1);
-            bool empty = true; // used to detect different connected components
-
-            int firstVal  = reverse ? maxDegreeInCurrentLevelSet : 0;
-            int finalVal  = reverse ? -1 : maxDegreeInCurrentLevelSet + 1;
-            int increment = reverse ? -1 : 1;
-
-            for(int soughtDegree = firstVal; soughtDegree != finalVal; soughtDegree += increment)
-            {
-                int node = firstWithDegree[soughtDegree];
-                while (node > 0) {
-                    // Visit neighbors
-                    for(row_iterator a = backend::row_begin(A, node); a; ++a) {
-                        int c = a.col();
-                        if (levelSet[c] == 0) {
-                            levelSet[c] = currentLevelSet + 1;
-                            perm[next] = c;
-                            ++next;
-                            empty = false; // this level set is not empty
-                            nextSameDegree[c] = nFirstWithDegree[degree[c]];
-                            nFirstWithDegree[degree[c]] = c;
-                            nMDICLS = std::max(nMDICLS, degree[c]);
-                        }
-                    }
-                    node = nextSameDegree[node];
-                }
-            }
-
-            ++currentLevelSet;
-            maxDegreeInCurrentLevelSet = nMDICLS;
-            for(int i = 0; i <= nMDICLS; ++i)
-                firstWithDegree[i] = nFirstWithDegree[i];
-
-            if (empty) {
-                // The graph contains another connected component that we
-                // cannot reach.  Search for a node that has not yet been
-                // included in a level set, and start exploring from it.
-                bool found = false;
-                for(int i = 0; i < n; ++i) {
-                    if (levelSet[i] == 0) {
-                        perm[next] = i;
-                        ++next;
-                        levelSet[i] = currentLevelSet;
-                        maxDegreeInCurrentLevelSet = degree[i];
-                        firstWithDegree[maxDegreeInCurrentLevelSet] = i;
-                        found = true;
-                        break;
-                    }
-                }
-                precondition(found, "Internal consistency error at skyline_lu");
-            }
-        }
-    }
-};
-
-}
-
 /// Direct solver that uses skyline LU factorization.
 template <
     typename ValueType,
-    class ordering = matrix_permutation::CuthillMcKee<false>
+    class ordering = reorder::cuthill_mckee<false>
     >
 class skyline_lu {
     public:
@@ -345,11 +236,12 @@ class skyline_lu {
          */
         void factorize() {
             precondition(!math::is_zero(D[0]), "Zero diagonal in skyline_lu");
+            D[0] = math::inverse(D[0]);
 
             for(int k = 0; k < n - 1; ++k) {
                 // check whether A(1,k+1) lies within the skyline structure
                 if (ptr[k + 1] + k + 1 == ptr[k + 2]) {
-                    U[ptr[k+1]] = math::inverse(D[0]) * U[ptr[k+1]];
+                    U[ptr[k+1]] = D[0] * U[ptr[k+1]];
                 }
 
                 // Compute column k+1 of U
@@ -369,7 +261,7 @@ class skyline_lu {
                     for(int j = jBeginMult; j < i; ++j, ++indexL, ++indexU)
                         sum -= L[indexL] * U[indexU];
 
-                    U[indexEntry] = math::inverse(D[i]) * sum;
+                    U[indexEntry] = D[i] * sum;
                 }
 
                 // Compute row k+1 of L
@@ -394,18 +286,15 @@ class skyline_lu {
                 }
 
                 // Find element in diagonal
-                value_type sum = D[k + 1];
+                value_type sum = D[k+1];
                 for(int j = ptr[k+1]; j < ptr[k+2]; ++j)
                     sum -= L[j] * U[j];
 
                 precondition(!math::is_zero(sum),
                         "Zero sum in skyline_lu factorization");
 
-                D[k+1] = sum;
+                D[k+1] = math::inverse(sum);
             }
-
-            // Invert diagonal
-            for(int i = 0; i < n; ++i) D[i] = math::inverse(D[i]);
         }
 };
 
diff --git a/external_libraries/amgcl/util.hpp b/external_libraries/amgcl/util.hpp
index 8f2c6f02705d..5657ff2f9637 100644
--- a/external_libraries/amgcl/util.hpp
+++ b/external_libraries/amgcl/util.hpp
@@ -42,23 +42,27 @@ THE SOFTWARE.
 
 /* Performance measurement macros
  *
- * If AMGCL_PROFILING macro is defined at compilation, then TIC(name) and
- * TOC(name) macros correspond to prof.tic(name) and prof.toc(name).
+ * If AMGCL_PROFILING macro is defined at compilation, then AMGCL_TIC(name) and
+ * AMGCL_TOC(name) macros correspond to prof.tic(name) and prof.toc(name).
  * amgcl::prof should be an instance of amgcl::profiler<> defined in a user
  * code similar to:
  * \code
  * namespace amgcl { profiler<> prof; }
  * \endcode
- * If AMGCL_PROFILING is undefined, then TIC and TOC are noop macros.
+ * If AMGCL_PROFILING is undefined, then AMGCL_TIC and AMGCL_TOC are noop macros.
  */
 #ifdef AMGCL_PROFILING
 #  include <amgcl/profiler.hpp>
-#  define TIC(name) amgcl::prof.tic(name);
-#  define TOC(name) amgcl::prof.toc(name);
+#  define AMGCL_TIC(name) amgcl::prof.tic(name);
+#  define AMGCL_TOC(name) amgcl::prof.toc(name);
 namespace amgcl { extern profiler<> prof; }
 #else
-#  define TIC(name)
-#  define TOC(name)
+#  ifndef AMGCL_TIC
+#    define AMGCL_TIC(name)
+#  endif
+#  ifndef AMGCL_TOC
+#    define AMGCL_TOC(name)
+#  endif
 #endif
 
 #define AMGCL_DEBUG_SHOW(x)                                                    \

From f205b3ea7b453064d6aa9f12d86f04e02ac40120 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Thu, 10 Aug 2017 14:40:43 +0200
Subject: [PATCH 08/12] changing name to the files to be more consistent

---
 .../amgcl_mpi_schur_complement_solver.h       |  63 ++-
 .../{amgcl_solver.h => amgcl_mpi_solver.h}    |   0
 .../external_includes/mpi_amgcl_ns_solver.h   | 391 ------------------
 .../tests/test_trilinos_linear_solvers.py     |   3 +-
 4 files changed, 42 insertions(+), 415 deletions(-)
 rename applications/trilinos_application/external_includes/{amgcl_solver.h => amgcl_mpi_solver.h} (100%)
 delete mode 100644 applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h

diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
index 7c45cc4c935c..ad3e7830edc7 100644
--- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
+++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
@@ -138,13 +138,13 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
         mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
 
         //setting pressure solver options
-        mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
-        mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
-        mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
-        mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
-        mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0);
-        mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1);
-        mprm.put("precond.psolver.precond.coarse_enough",mCoarseEnough);
+        mprm.put("precond.psolver.solver.local.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
+        mprm.put("precond.psolver.solver.local.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
+        mprm.put("precond.psolver.solver.local.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
+//         mprm.put("precond.psolver.precond.local.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
+//         mprm.put("precond.psolver.precond.local.coarsening.aggr.eps_strong", 0.0);
+//         mprm.put("precond.psolver.precond.local.coarsening.aggr.block_size", 1);
+//         mprm.put("precond.psolver.precond.local.coarse_enough",mCoarseEnough);
 
     }
 
@@ -184,26 +184,45 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
 
         typedef amgcl::backend::builtin<double> Backend;
         
-        typedef  amgcl::mpi::make_solver<
-            amgcl::mpi::schur_pressure_correction<
-                amgcl::mpi::make_solver<
-                    amgcl::mpi::block_preconditioner<
-                        amgcl::runtime::relaxation::as_preconditioner<Backend>
+//         typedef  amgcl::mpi::make_solver<
+//             amgcl::mpi::schur_pressure_correction<
+//                 amgcl::mpi::make_solver<
+//                     amgcl::mpi::block_preconditioner<
+//                         amgcl::runtime::relaxation::as_preconditioner<Backend>
+//                     >,
+//                     amgcl::runtime::iterative_solver
+//                 >,
+//                 amgcl::mpi::make_solver<
+//                     amgcl::mpi::subdomain_deflation<
+//                         amgcl::runtime::amg<Backend>,
+//                         amgcl::runtime::iterative_solver,
+//                         amgcl::runtime::mpi::direct_solver<double>
+//                     >,
+//                     amgcl::runtime::iterative_solver
+//                 >
+//             >,
+//             amgcl::runtime::iterative_solver
+//             > SDD;
+    typedef amgcl::mpi::make_solver<
+        amgcl::mpi::schur_pressure_correction<
+            amgcl::mpi::make_solver<
+                amgcl::mpi::block_preconditioner<
+                    amgcl::runtime::relaxation::as_preconditioner<Backend, relaxation::damped_jacobi>
                     >,
-                    amgcl::runtime::iterative_solver
+                amgcl::runtime::solver::bicgstab
                 >,
-                amgcl::mpi::make_solver<
-                    amgcl::mpi::subdomain_deflation<
-                        amgcl::runtime::amg<Backend>,
-                        amgcl::runtime::iterative_solver,
-                        amgcl::runtime::mpi::direct_solver<double>
+            amgcl::mpi::make_solver<
+                amgcl::mpi::subdomain_deflation<
+                    amg<Backend, coarsening::smoothed_aggregation, relaxation::ilu0>,
+                    amgcl::runtime::solver::bicgstab,
+                    amgcl::mpi::skyline_lu<double>
                     >,
-                    amgcl::runtime::iterative_solver
+                amgcl::runtime::solver::fgmres
                 >
             >,
-            amgcl::runtime::iterative_solver
-            > SDD;
-
+        amgcl::runtime::solver::fgmres
+        > SDD;
+        
         boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
         mprm.put("precond.psolver.precond.num_def_vec", 1);
         mprm.put("precond.psolver.precond.def_vec", &dv);
diff --git a/applications/trilinos_application/external_includes/amgcl_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_solver.h
similarity index 100%
rename from applications/trilinos_application/external_includes/amgcl_solver.h
rename to applications/trilinos_application/external_includes/amgcl_mpi_solver.h
diff --git a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
deleted file mode 100644
index 52521a58bd55..000000000000
--- a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h
+++ /dev/null
@@ -1,391 +0,0 @@
-//    |  /           |
-//    ' /   __| _` | __|  _ \   __|
-//    . \  |   (   | |   (   |\__ `
-//   _|\_\_|  \__,_|\__|\___/ ____/
-//                   Multi-Physics
-//
-//  License:		 BSD License
-//					 Kratos default license: kratos/license.txt
-//
-//  Main authors:    Denis Demidov
-//                   Riccardo Rossi
-//
-
-
-#if !defined(KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED )
-#define  KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED
-
-
-#ifndef AMGCL_PARAM_UNKNOWN
-#  define AMGCL_PARAM_UNKNOWN(name)                                            \
-      std::cerr << "AMGCL WARNING: unknown parameter " << name << std::endl
-#endif
-
-
-// External includes
-#include "boost/smart_ptr.hpp"
-
-// Project includes
-#include "includes/define.h"
-#include "includes/kratos_parameters.h"
-#include "linear_solvers/linear_solver.h"
-#include "external_includes/amgcl_solver.h"
-
-//aztec solver includes
-#include "AztecOO.h"
-#include "Epetra_LinearProblem.h"
-//#include "Teuchos_ParameterList.hpp"
-
-
-#include <amgcl/amg.hpp>
-#include <amgcl/adapter/epetra.hpp>
-#include <amgcl/profiler.hpp>
-
-#include <amgcl/mpi/make_solver.hpp>
-#include <amgcl/mpi/schur_pressure_correction.hpp>
-#include <amgcl/mpi/block_preconditioner.hpp>
-#include <amgcl/mpi/subdomain_deflation.hpp>
-#include <amgcl/mpi/direct_solver.hpp>
-#include <boost/property_tree/json_parser.hpp> //needed to print AMGCL internal settings
-
-
-
-namespace Kratos
-{
-
-
-
-template< class TSparseSpaceType, class TDenseSpaceType,
-          class TReordererType = Reorderer<TSparseSpaceType, TDenseSpaceType> >
-class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType,
-    TDenseSpaceType, TReordererType>
-{
-public:
-    /**
-     * Counted pointer of AmgclMPI_NS_Solver
-     */
-    KRATOS_CLASS_POINTER_DEFINITION ( AmgclMPI_NS_Solver );
-
-    typedef LinearSolver<TSparseSpaceType, TDenseSpaceType, TReordererType> BaseType;
-
-    typedef typename TSparseSpaceType::MatrixType SparseMatrixType;
-
-    typedef typename TSparseSpaceType::VectorType VectorType;
-
-    typedef typename TDenseSpaceType::MatrixType DenseMatrixType;
-
-
-    AmgclMPI_NS_Solver ( Parameters rParameters )
-    {
-//         Parameters default_parameters( R"(
-//                                        {
-//                                        "solver_type" : "AmgclMPI_NS_Solver",
-//                                        "krylov_type" : "fgmres",
-//                                        "velocity_block_preconditioner" :
-//                                         {
-//                                             "krylov_type" : "lgmres",
-//                                             "tolerance" : 1e-3,
-//                                             "preconditioner_type" : "ilu0",
-//                                             "max_iteration": 5
-//                                         },
-//                                         "pressure_block_preconditioner" :
-//                                         {
-//                                             "krylov_type" : "lgmres",
-//                                             "tolerance" : 1e-2,
-//                                             "preconditioner_type" : "spai0",
-//                                             "max_iteration": 20
-//                                         },
-//                                        "tolerance" : 1e-9,
-//                                        "gmres_krylov_space_dimension": 50,
-//                                        "coarsening_type": "aggregation",
-//                                        "max_iteration": 50,
-//                                        "verbosity" : 1,
-//                                        "scaling": false,
-//                                        "coarse_enough" : 5000
-//                                    }  )" );
-// 
-// 
-//         //now validate agains defaults -- this also ensures no type mismatch
-//         rParameters.ValidateAndAssignDefaults(default_parameters);
-// 
-//         std::stringstream msg;
-// 
-//         //validate if values are admissible
-//         std::set<std::string> available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"};
-// 
-//         //check velocity block settings
-//         if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end())
-//         {
-//             msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl;
-//             msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl;
-//             KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str());
-//         }    
-// 
-//         mcoarse_enough = rParameters["coarse_enough"].GetInt();
-// 
-//         mtol = rParameters["tolerance"].GetDouble();
-//         mmax_it = rParameters["max_iteration"].GetInt();
-//         mverbosity=rParameters["verbosity"].GetInt();
-//         mprm.put("solver.type", rParameters["krylov_type"].GetString());
-//         
-//         if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres")
-//             mprm.put("solver.M",  rParameters["gmres_krylov_space_dimension"].GetInt());
-// 
-//         //setting velocity solver options
-//         mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString());
-//         mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble());
-//         mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt());
-//         mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
-// 
-//         //setting pressure solver options
-//         mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
-//         mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
-//         mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
-//         mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
-//         mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0);
-//         mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1);
-//         mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough);
-
-    }
-
-
-    /**
-     * Destructor
-     */
-    virtual ~AmgclMPI_NS_Solver() {}
-
-    /**
-     * Normal solve method.
-     * Solves the linear system Ax=b and puts the result on SystemVector& rX.
-     * rX is also th initial guess for iterative methods.
-     * @param rA. System matrix
-     * @param rX. Solution vector.
-     * @param rB. Right hand side vector.
-     */
-    bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB )
-    {
-        KRATOS_TRY
-KRATOS_WATCH(__LINE__)
-        using amgcl::prof;
-        prof.reset();
-KRATOS_WATCH(__LINE__)
-
-        amgcl::mpi::communicator world ( MPI_COMM_WORLD );
-        if ( mverbosity >=0 && world.rank == 0 ) {
-            std::cout << "World size: " << world.size << std::endl;
-        }
-KRATOS_WATCH(__LINE__)
-
-        int chunk = rA.NumMyRows();
-        boost::iterator_range<double*> xrange ( rX.Values(), rX.Values() + chunk );
-        boost::iterator_range<double*> frange ( rB.Values(), rB.Values() + chunk );
-KRATOS_WATCH(__LINE__)
-
-        if ( mverbosity > 1 && world.rank == 0 ) {
-            write_json ( std::cout, mprm );
-        }
-KRATOS_WATCH(__LINE__)
-
-        typedef amgcl::backend::builtin<double> Backend;
-        
-        typedef  amgcl::mpi::make_solver<
-            amgcl::mpi::schur_pressure_correction<
-                amgcl::mpi::make_solver<
-                    amgcl::mpi::block_preconditioner<
-                        amgcl::runtime::relaxation::as_preconditioner<Backend>
-                    >,
-                    amgcl::runtime::iterative_solver
-                >,
-                amgcl::mpi::make_solver<
-                    amgcl::mpi::subdomain_deflation<
-                        amgcl::runtime::amg<Backend>,
-                        amgcl::runtime::iterative_solver,
-                        amgcl::runtime::mpi::direct_solver<double>
-                    >,
-                    amgcl::runtime::iterative_solver
-                >
-            >,
-            amgcl::runtime::iterative_solver
-            > SDD;
-KRATOS_WATCH(__LINE__)
-
-        boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
-        mprm.put("precond.psolver.precond.num_def_vec", 1);
-        mprm.put("precond.psolver.precond.def_vec", &dv);
-        
-        mprm.put("precond.pmask", static_cast<void*>(&mp[0]));
-        mprm.put("precond.pmask_size", mp.size());
-KRATOS_WATCH(__LINE__)
-
-        prof.tic ( "setup" );
-        SDD solve ( world, amgcl::backend::map ( rA ), mprm );
-        double tm_setup = prof.toc ( "setup" );
-KRATOS_WATCH(__LINE__)
-
-        prof.tic ( "Solve" );
-        size_t iters;
-        double resid;
-        boost::tie ( iters, resid ) = solve ( frange, xrange );
-        double solve_tm = prof.toc ( "Solve" );
-
-
-
-        if ( rA.Comm().MyPID() == 0 ) {
-            if ( mverbosity > 0 ) {
-                std::cout
-                        << "------- AMGCL -------\n" << std::endl
-                        << "Iterations      : " << iters   << std::endl
-                        << "Error           : " << resid   << std::endl
-                        << "amgcl setup time: " << tm_setup   << std::endl
-                        << "amgcl solve time: " << solve_tm   << std::endl;
-            }
-
-            if ( mverbosity > 1 ) {
-                std::cout << prof  << std::endl;
-            }
-        }
-
-
-
-        return true;
-
-
-
-
-
-        KRATOS_CATCH ( "" );
-    }
-
-    /**
-     * Multi solve method for solving a set of linear systems with same coefficient matrix.
-     * Solves the linear system Ax=b and puts the result on SystemVector& rX.
-     * rX is also th initial guess for iterative methods.
-     * @param rA. System matrix
-     * @param rX. Solution vector.
-     * @param rB. Right hand side vector.
-     */
-    bool Solve ( SparseMatrixType& rA, DenseMatrixType& rX, DenseMatrixType& rB )
-    {
-
-        return false;
-    }
-
-    /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example
-    * when solving a mixed u-p problem, it is important to identify the row associated to v and p.
-    * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers
-    * which require knowledge on the spatial position of the nodes associated to a given dof.
-    * This function tells if the solver requires such data
-    */
-    virtual bool AdditionalPhysicalDataIsNeeded()
-    {
-        return true;
-    }
-
-    /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example
-     * when solving a mixed u-p problem, it is important to identify the row associated to v and p.
-     * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers
-     * which require knowledge on the spatial position of the nodes associated to a given dof.
-     * This function is the place to eventually provide such data
-     */
-    void ProvideAdditionalData (
-        SparseMatrixType& rA,
-        VectorType& rX,
-        VectorType& rB,
-        typename ModelPart::DofsArrayType& rdof_set,
-        ModelPart& r_model_part
-    )
-    {
-KRATOS_WATCH(__LINE__)
-        int my_pid = rA.Comm().MyPID();
-
-        //filling the pressure mask
-        if(mp.size() != static_cast<unsigned int>(rA.NumMyRows())) mp.resize( rA.NumMyRows() );
-KRATOS_WATCH(__LINE__)
-        int counter = 0;
-        for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++)
-        {
-            if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid )
-            {
-                mp[counter]  = (it->GetVariable().Key() == PRESSURE);
-                counter++;
-            }
-        }
-KRATOS_WATCH(__LINE__)
-        if(counter != rA.NumMyRows())
-            KRATOS_ERROR << "pressure mask as a size " << mp.size() << " which does not correspond with the number of local rows:" << rA.NumMyRows() << std::endl; 
-
-    }
-
-    /**
-     * Print information about this object.
-     */
-    void  PrintInfo ( std::ostream& rOStream ) const
-    {
-        rOStream << "AMGCL_MPI solver finished.";
-    }
-
-    /**
-     * Print object's data.
-     */
-    void  PrintData ( std::ostream& rOStream ) const
-    {
-    }
-
-private:
-
-    double mtol;
-    int mmax_it;
-    int mverbosity;
-    unsigned int mcoarse_enough;
-    bool muse_linear_deflation = false;
-    bool mprovide_coordinates = false;
-    bool muse_block_matrices_if_possible = false;
-
-    std::vector< char > mp; //pressure mask
-    boost::property_tree::ptree mprm;
-
-
-    /**
-     * Assignment operator.
-     */
-    AmgclMPI_NS_Solver& operator= ( const AmgclMPI_NS_Solver& Other );
-
-    /**
-     * Copy constructor.
-     */
-    AmgclMPI_NS_Solver ( const AmgclMPI_NS_Solver& Other );
-
-};
-
-
-/**
- * input stream function
- */
-template<class TSparseSpaceType, class TDenseSpaceType,class TReordererType>
-inline std::istream& operator >> ( std::istream& rIStream, AmgclMPI_NS_Solver< TSparseSpaceType,
-                                   TDenseSpaceType, TReordererType>& rThis )
-{
-    return rIStream;
-}
-
-/**
- * output stream function
- */
-template<class TSparseSpaceType, class TDenseSpaceType, class TReordererType>
-inline std::ostream& operator << ( std::ostream& rOStream,
-                                   const AmgclMPI_NS_Solver<TSparseSpaceType,
-                                   TDenseSpaceType, TReordererType>& rThis )
-{
-    rThis.PrintInfo ( rOStream );
-    rOStream << std::endl;
-    rThis.PrintData ( rOStream );
-
-    return rOStream;
-}
-
-
-}  // namespace Kratos.
-
-#endif // KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED  defined 
-
-
diff --git a/applications/trilinos_application/tests/test_trilinos_linear_solvers.py b/applications/trilinos_application/tests/test_trilinos_linear_solvers.py
index 0ecb886b28d7..913651faeba5 100644
--- a/applications/trilinos_application/tests/test_trilinos_linear_solvers.py
+++ b/applications/trilinos_application/tests/test_trilinos_linear_solvers.py
@@ -28,7 +28,6 @@ def _auxiliary_test_function(self, settings, matrix_name="A.mm", absolute_norm=F
         space.ReadMatrixMarketMatrix(GetFilePath(matrix_name),pA)
         n = space.Size1(pA.GetReference())
 
-
         pAoriginal = space.CreateEmptyMatrixPointer(comm) #create a copy of A
         space.ReadMatrixMarketMatrix(GetFilePath(matrix_name),pAoriginal)
 
@@ -52,7 +51,7 @@ def _auxiliary_test_function(self, settings, matrix_name="A.mm", absolute_norm=F
         #construct the solver
         import trilinos_linear_solver_factory
         linear_solver = trilinos_linear_solver_factory.ConstructSolver(settings)
-
+        
         #solve
         linear_solver.Solve(pA.GetReference(),px.GetReference(),pb.GetReference())
 

From f6b16c4323348129fab6a218a1f46dbffa7cd0d7 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Sat, 2 Sep 2017 22:42:45 +0200
Subject: [PATCH 09/12] solving the segfault - unfortunately still not
 selecting correctly the solvers

---
 .../amgcl_mpi_schur_complement_solver.h       | 40 ++++++++++++++-----
 .../amgcl/mpi/schur_pressure_correction.hpp   |  1 +
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
index ad3e7830edc7..95e183012666 100644
--- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
+++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
@@ -40,6 +40,7 @@
 #include <amgcl/amg.hpp>
 #include <amgcl/adapter/epetra.hpp>
 #include <amgcl/profiler.hpp>
+#include <amgcl/solver/bicgstab.hpp>
 
 #include <amgcl/mpi/make_solver.hpp>
 #include <amgcl/mpi/schur_pressure_correction.hpp>
@@ -138,9 +139,9 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
         mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString());
 
         //setting pressure solver options
-        mprm.put("precond.psolver.solver.local.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
-        mprm.put("precond.psolver.solver.local.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
-        mprm.put("precond.psolver.solver.local.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
+        mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString());
+        mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble());
+        mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt());
 //         mprm.put("precond.psolver.precond.local.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString());
 //         mprm.put("precond.psolver.precond.local.coarsening.aggr.eps_strong", 0.0);
 //         mprm.put("precond.psolver.precond.local.coarsening.aggr.block_size", 1);
@@ -203,24 +204,45 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
 //             >,
 //             amgcl::runtime::iterative_solver
 //             > SDD;
+        
+//         typedef  amgcl::mpi::make_solver<
+//             amgcl::mpi::schur_pressure_correction<
+//                 amgcl::mpi::make_solver<
+//                     amgcl::mpi::block_preconditioner<
+//                         amgcl::runtime::relaxation::as_preconditioner<Backend>
+//                     >,
+//                     amgcl::solver::bicgstab
+//                 >,
+//                 amgcl::mpi::make_solver<
+//                     amgcl::mpi::subdomain_deflation<
+//                         amgcl::runtime::amg<Backend>,
+//                         amgcl::runtime::iterative_solver,
+//                         amgcl::runtime::mpi::direct_solver<double>
+//                     >,
+//                     amgcl::runtime::iterative_solver
+//                 >
+//             >,
+//             amgcl::runtime::iterative_solver
+//             > SDD;
+            
     typedef amgcl::mpi::make_solver<
         amgcl::mpi::schur_pressure_correction<
             amgcl::mpi::make_solver<
                 amgcl::mpi::block_preconditioner<
-                    amgcl::runtime::relaxation::as_preconditioner<Backend, relaxation::damped_jacobi>
+                    amgcl::runtime::relaxation::as_preconditioner<Backend>
                     >,
-                amgcl::runtime::solver::bicgstab
+                amgcl::runtime::iterative_solver //amgcl::solver::bicgstab
                 >,
             amgcl::mpi::make_solver<
                 amgcl::mpi::subdomain_deflation<
-                    amg<Backend, coarsening::smoothed_aggregation, relaxation::ilu0>,
-                    amgcl::runtime::solver::bicgstab,
+                    amgcl::runtime::amg<Backend>,
+                    amgcl::runtime::iterative_solver, //amgcl::solver::bicgstab,
                     amgcl::mpi::skyline_lu<double>
                     >,
-                amgcl::runtime::solver::fgmres
+                amgcl::solver::fgmres
                 >
             >,
-        amgcl::runtime::solver::fgmres
+        amgcl::solver::fgmres
         > SDD;
         
         boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
diff --git a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
index afab65dbab86..f5e0ed3589b6 100644
--- a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
+++ b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
@@ -481,6 +481,7 @@ class schur_pressure_correction {
 
                         u2x->col[u2x_head] = j;
                         u2x->val[u2x_head] = math::identity<value_type>();
+                        ++u2x_head;
                     }
                 }
             }

From f2c5dbd13719665c6e538ea296656cf9b6ae3c7c Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Sun, 3 Sep 2017 11:14:27 +0200
Subject: [PATCH 10/12] adding some files which are present in the latest
 versions of amgcl

---
 external_libraries/amgcl/adapter/reorder.hpp  | 288 +++++++++++
 .../amgcl/reorder/cuthill_mckee.hpp           | 193 ++++++++
 external_libraries/amgcl/solver/idrs.hpp      | 461 ++++++++++++++++++
 3 files changed, 942 insertions(+)
 create mode 100644 external_libraries/amgcl/adapter/reorder.hpp
 create mode 100644 external_libraries/amgcl/reorder/cuthill_mckee.hpp
 create mode 100644 external_libraries/amgcl/solver/idrs.hpp

diff --git a/external_libraries/amgcl/adapter/reorder.hpp b/external_libraries/amgcl/adapter/reorder.hpp
new file mode 100644
index 000000000000..c858f0e1f866
--- /dev/null
+++ b/external_libraries/amgcl/adapter/reorder.hpp
@@ -0,0 +1,288 @@
+#ifndef AMGCL_ADAPTER_REORDER_HPP
+#define AMGCL_ADAPTER_REORDER_HPP
+
+/*
+The MIT License
+
+Copyright (c) 2012-2017 Denis Demidov <dennis.demidov@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/**
+\file    amgcl/adapter/reorder.hpp
+\author  Denis Demidov <dennis.demidov@gmail.com>
+\brief   On-the-fly reodering of matrix and vectors.
+\ingroup adapters
+*/
+
+#include <boost/type_traits.hpp>
+#include <boost/range/size.hpp>
+#include <boost/iterator/permutation_iterator.hpp>
+
+
+#include <amgcl/reorder/cuthill_mckee.hpp>
+#include <amgcl/backend/builtin.hpp>
+#include <amgcl/backend/detail/matrix_ops.hpp>
+
+namespace amgcl {
+namespace adapter {
+
+template <class Matrix>
+struct reordered_matrix {
+    typedef typename backend::value_type<Matrix>::type value_type;
+    typedef typename backend::row_iterator<Matrix>::type base_iterator;
+
+    const Matrix &A;
+    const ptrdiff_t * perm;
+    const ptrdiff_t * iperm;
+
+    reordered_matrix(const Matrix &A, const ptrdiff_t *perm, const ptrdiff_t * iperm)
+        : A(A), perm(perm), iperm(iperm)
+    {}
+
+    size_t rows() const {
+        return backend::rows(A);
+    }
+
+    size_t cols() const {
+        return backend::cols(A);
+    }
+
+    size_t nonzeros() const {
+        return backend::nonzeros(A);
+    }
+
+    struct row_iterator {
+        base_iterator base;
+        const ptrdiff_t * iperm;
+
+        row_iterator(const base_iterator &base, const ptrdiff_t *iperm)
+            : base(base), iperm(iperm)
+        {}
+
+        operator bool() const {
+            return base;
+        }
+
+        row_iterator& operator++() {
+            ++base;
+            return *this;
+        }
+
+        ptrdiff_t col() const {
+            return iperm[base.col()];
+        }
+
+        value_type value() const {
+            return base.value();
+        }
+    };
+
+    row_iterator row_begin(size_t i) const {
+        return row_iterator(backend::row_begin(A, perm[i]), iperm);
+    }
+};
+
+template <class Vector>
+struct reordered_vector {
+    typedef typename backend::value_type<typename boost::decay<Vector>::type>::type value_type;
+
+    Vector &x;
+    const ptrdiff_t *perm;
+
+    reordered_vector(Vector &x, const ptrdiff_t *perm) : x(x), perm(perm) {}
+
+    size_t size() const {
+        return boost::size(x);
+    }
+
+    const value_type& operator[](size_t i) const {
+        return x[perm[i]];
+    }
+
+    value_type& operator[](size_t i) {
+        return x[perm[i]];
+    }
+
+    boost::permutation_iterator<
+        typename boost::decay<Vector>::type::iterator,
+        const ptrdiff_t*
+        >
+    begin() {
+        return boost::make_permutation_iterator(boost::begin(x), perm);
+    }
+
+    boost::permutation_iterator<
+        typename boost::decay<Vector>::type::const_iterator,
+        const ptrdiff_t*
+        >
+    begin() const {
+        return boost::make_permutation_iterator(boost::begin(x), perm);
+    }
+
+    boost::permutation_iterator<
+        typename boost::decay<Vector>::type::iterator,
+        const ptrdiff_t*
+        >
+    end() {
+        return boost::make_permutation_iterator(boost::end(x), perm + size());
+    }
+
+    boost::permutation_iterator<
+        typename boost::decay<Vector>::type::const_iterator,
+        const ptrdiff_t*
+        >
+    end() const {
+        return boost::make_permutation_iterator(boost::end(x), perm + size());
+    }
+};
+
+} // namespace adapter
+
+namespace backend {
+
+//---------------------------------------------------------------------------
+// Specialization of matrix interface
+//---------------------------------------------------------------------------
+template <class Matrix>
+struct value_type< adapter::reordered_matrix<Matrix> >
+    : value_type<Matrix>
+{};
+
+template <class Matrix>
+struct rows_impl< adapter::reordered_matrix<Matrix> >
+{
+    static size_t get(const adapter::reordered_matrix<Matrix> &A) {
+        return A.rows();
+    }
+};
+
+template <class Matrix>
+struct cols_impl< adapter::reordered_matrix<Matrix> >
+{
+    static size_t get(const adapter::reordered_matrix<Matrix> &A) {
+        return A.cols();
+    }
+};
+
+template <class Matrix>
+struct nonzeros_impl< adapter::reordered_matrix<Matrix> >
+{
+    static size_t get(const adapter::reordered_matrix<Matrix> &A) {
+        return A.nonzeros();
+    }
+};
+
+template <class Matrix>
+struct row_iterator< adapter::reordered_matrix<Matrix> >
+{
+    typedef
+        typename adapter::reordered_matrix<Matrix>::row_iterator
+        type;
+};
+
+template <class Matrix>
+struct row_begin_impl< adapter::reordered_matrix<Matrix> >
+{
+    typedef adapter::reordered_matrix<Matrix> M;
+    static typename row_iterator<M>::type
+    get(const M &matrix, size_t row) {
+        return matrix.row_begin(row);
+    }
+};
+
+namespace detail {
+
+template <class Matrix>
+struct use_builtin_matrix_ops< adapter::reordered_matrix<Matrix> >
+    : boost::true_type
+{};
+
+} // namespace detail
+
+
+template <class Vector>
+struct is_builtin_vector< adapter::reordered_vector<Vector> >
+    : is_builtin_vector<typename boost::decay<Vector>::type>
+{};
+
+} // namespace backend
+
+namespace adapter {
+
+template <class ordering = amgcl::reorder::cuthill_mckee<false> >
+class reorder {
+    public:
+        template <class Matrix>
+        reorder(const Matrix &A) : n(backend::rows(A)), perm(n), iperm(n)
+        {
+            ordering::get(A, perm);
+#pragma omp parallel for
+            for(ptrdiff_t i = 0; i < n; ++i) iperm[perm[i]] = i;
+        }
+
+        template <class Matrix>
+        typename boost::disable_if<
+            backend::is_builtin_vector<Matrix>,
+            reordered_matrix<Matrix>
+        >::type
+        operator()(const Matrix &A) const {
+            return reordered_matrix<Matrix>(A, perm.data(), iperm.data());
+        }
+
+        template <class Vector>
+        typename boost::enable_if<
+            backend::is_builtin_vector<Vector>,
+            reordered_vector<Vector>
+        >::type
+        operator()(Vector &x) const {
+            return reordered_vector<Vector>(x, perm.data());
+        }
+
+        template <class Vector>
+        typename boost::enable_if<
+            backend::is_builtin_vector<Vector>,
+            reordered_vector<const Vector>
+        >::type
+        operator()(const Vector &x) const {
+            return reordered_vector<const Vector>(x, perm.data());
+        }
+
+        template <class Vector1, class Vector2>
+        void forward(const Vector1 &x, Vector2 &y) const {
+#pragma omp parallel for
+            for(ptrdiff_t i = 0; i < n; ++i) y[i] = x[perm[i]];
+        }
+
+        template <class Vector1, class Vector2>
+        void inverse(const Vector1 &x, Vector2 &y) const {
+#pragma omp parallel for
+            for(ptrdiff_t i = 0; i < n; ++i) y[perm[i]] = x[i];
+        }
+
+    private:
+        ptrdiff_t n;
+        backend::numa_vector<ptrdiff_t> perm, iperm;
+};
+
+} // namespace adapter
+} // namespace amgcl
+
+#endif
diff --git a/external_libraries/amgcl/reorder/cuthill_mckee.hpp b/external_libraries/amgcl/reorder/cuthill_mckee.hpp
new file mode 100644
index 000000000000..aa97926c7346
--- /dev/null
+++ b/external_libraries/amgcl/reorder/cuthill_mckee.hpp
@@ -0,0 +1,193 @@
+#ifndef AMGCL_REORDER_CUTHILL_MCKEE_HPP
+#define AMGCL_REORDER_CUTHILL_MCKEE_HPP
+
+/*
+The MIT License
+
+Copyright (c) 2012-2017 Denis Demidov <dennis.demidov@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/**
+\file   amgcl/reorder/cuthill_mckee.hpp
+\author Denis Demidov <dennis.demidov@gmail.com>
+\brief  (Reverse) Cuthill-McKee matrix reorder algorithm.
+
+The code is adopted from Kratos project http://www.cimne.com/kratos. The
+original code came with the following copyright notice:
+\verbatim
+Kratos Multi-Physics
+
+Copyright (c) 2012, Pooyan Dadvand, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    Redistributions of source code must retain the above copyright notice, this
+    list of conditions and the following disclaimer.
+    Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+    All advertising materials mentioning features or use of this software must
+    display the following acknowledgement:
+    This product includes Kratos Multi-Physics technology.
+    Neither the name of the CIMNE nor the names of its contributors may be used
+    to endorse or promote products derived from this software without specific
+    prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED ANDON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THISSOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+\endverbatim
+*/
+
+#include <vector>
+#include <algorithm>
+
+#include <amgcl/backend/interface.hpp>
+#include <amgcl/reorder/cuthill_mckee.hpp>
+#include <amgcl/util.hpp>
+
+namespace amgcl {
+namespace reorder {
+
+template <bool reverse = false>
+struct cuthill_mckee {
+    template <class Matrix, class Vector>
+    static void get(const Matrix &A, Vector &perm) {
+        typedef typename backend::row_iterator<Matrix>::type row_iterator;
+        const ptrdiff_t n = backend::rows(A);
+
+        /* The data structure used to sort and traverse the level sets:
+         *
+         * The current level set is currentLevelSet;
+         * In this level set, there are nodes with degrees from 0 (not really
+         * useful) to maxDegreeInCurrentLevelSet.
+         * firstWithDegree[i] points to a node with degree i, or to -1 if it
+         * does not exist. nextSameDegree[firstWithDegree[i]] points to the
+         * second node with that degree, etc.
+         * While the level set is being traversed, the structure for the next
+         * level set is generated; nMDICLS will be the next
+         * maxDegreeInCurrentLevelSet and nFirstWithDegree will be
+         * firstWithDegree.
+         */
+        ptrdiff_t initialNode = 0; // node to start search
+        ptrdiff_t maxDegree   = 0;
+
+        std::vector<ptrdiff_t> degree(n);
+        std::vector<ptrdiff_t> levelSet(n, 0);
+        std::vector<ptrdiff_t> nextSameDegree(n, -1);
+
+#pragma omp parallel
+        {
+            ptrdiff_t maxd = 0;
+#pragma omp for
+            for(ptrdiff_t i = 0; i < n; ++i) {
+                ptrdiff_t row_width = 0;
+                for(row_iterator a = backend::row_begin(A, i); a; ++a, ++row_width);
+                degree[i] = row_width;
+                maxd = std::max(maxd, degree[i]);
+            }
+#pragma omp critical
+            {
+                maxDegree = std::max(maxDegree, maxd);
+            }
+        }
+
+        std::vector<ptrdiff_t> firstWithDegree(maxDegree + 1, -1);
+        std::vector<ptrdiff_t> nFirstWithDegree(maxDegree + 1);
+
+        // Initialize the first level set, made up by initialNode alone
+        perm[0] = initialNode;
+        ptrdiff_t currentLevelSet = 1;
+        levelSet[initialNode] = currentLevelSet;
+        ptrdiff_t maxDegreeInCurrentLevelSet = degree[initialNode];
+        firstWithDegree[maxDegreeInCurrentLevelSet] = initialNode;
+
+        // Main loop
+        for (ptrdiff_t next = 1; next < n; ) {
+            ptrdiff_t nMDICLS = 0;
+            std::fill(nFirstWithDegree.begin(), nFirstWithDegree.end(), -1);
+            bool empty = true; // used to detect different connected components
+
+            ptrdiff_t firstVal  = reverse ? maxDegreeInCurrentLevelSet : 0;
+            ptrdiff_t finalVal  = reverse ? -1 : maxDegreeInCurrentLevelSet + 1;
+            ptrdiff_t increment = reverse ? -1 : 1;
+
+            for(ptrdiff_t soughtDegree = firstVal; soughtDegree != finalVal; soughtDegree += increment)
+            {
+                ptrdiff_t node = firstWithDegree[soughtDegree];
+                while (node > 0) {
+                    // Visit neighbors
+                    for(row_iterator a = backend::row_begin(A, node); a; ++a) {
+                        ptrdiff_t c = a.col();
+                        if (levelSet[c] == 0) {
+                            levelSet[c] = currentLevelSet + 1;
+                            perm[next] = c;
+                            ++next;
+                            empty = false; // this level set is not empty
+                            nextSameDegree[c] = nFirstWithDegree[degree[c]];
+                            nFirstWithDegree[degree[c]] = c;
+                            nMDICLS = std::max(nMDICLS, degree[c]);
+                        }
+                    }
+                    node = nextSameDegree[node];
+                }
+            }
+
+            ++currentLevelSet;
+            maxDegreeInCurrentLevelSet = nMDICLS;
+            for(ptrdiff_t i = 0; i <= nMDICLS; ++i)
+                firstWithDegree[i] = nFirstWithDegree[i];
+
+            if (empty) {
+                // The graph contains another connected component that we
+                // cannot reach.  Search for a node that has not yet been
+                // included in a level set, and start exploring from it.
+                bool found = false;
+                for(ptrdiff_t i = 0; i < n; ++i) {
+                    if (levelSet[i] == 0) {
+                        perm[next] = i;
+                        ++next;
+                        levelSet[i] = currentLevelSet;
+                        maxDegreeInCurrentLevelSet = degree[i];
+                        firstWithDegree[maxDegreeInCurrentLevelSet] = i;
+                        found = true;
+                        break;
+                    }
+                }
+                precondition(found, "Internal consistency error at skyline_lu");
+            }
+        }
+    }
+};
+
+} // namespace reorder
+} // namespace amgcl
+
+#endif
diff --git a/external_libraries/amgcl/solver/idrs.hpp b/external_libraries/amgcl/solver/idrs.hpp
new file mode 100644
index 000000000000..0f6866203f33
--- /dev/null
+++ b/external_libraries/amgcl/solver/idrs.hpp
@@ -0,0 +1,461 @@
+#ifndef AMGCL_SOLVER_IDRS_HPP
+#define AMGCL_SOLVER_IDRS_HPP
+
+/*
+The MIT License
+
+Copyright (c) 2012-2017 Denis Demidov <dennis.demidov@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/**
+\file   idrs.hpp
+\author Denis Demidov <dennis.demidov@gmail.com>
+\brief  IDR(s) method.
+
+The code is ported from Matlab code published at
+http://ta.twi.tudelft.nl/nw/users/gijzen/IDR.html.
+
+This is a very stable and efficient IDR(s) variant (implemented in the MATLAB
+code idrs.m given above) as described in: Martin B. van Gijzen and Peter
+Sonneveld, Algorithm 913: An Elegant IDR(s) Variant that Efficiently Exploits
+Bi-orthogonality Properties. ACM Transactions on Mathematical Software, Vol.
+38, No. 1, pp. 5:1-5:19, 2011 (copyright ACM).
+*/
+
+#include <vector>
+#include <algorithm>
+
+#include <boost/tuple/tuple.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/normal_distribution.hpp>
+
+#include <amgcl/backend/interface.hpp>
+#include <amgcl/solver/detail/default_inner_product.hpp>
+#include <amgcl/util.hpp>
+
+#ifdef _OPENMP
+#  include <omp.h>
+#endif
+
+namespace amgcl {
+namespace solver {
+
+/// IDR(s) method (Induced Dimension Reduction)
+template <
+    class Backend,
+    class InnerProduct = detail::default_inner_product
+    >
+class idrs {
+    public:
+        typedef Backend backend_type;
+
+        typedef typename Backend::vector     vector;
+        typedef typename Backend::value_type value_type;
+        typedef typename Backend::params     backend_params;
+
+        typedef typename math::scalar_of<value_type>::type scalar_type;
+        typedef typename math::rhs_of<value_type>::type rhs_type;
+
+        typedef typename math::inner_product_impl<
+            typename math::rhs_of<value_type>::type
+            >::return_type coef_type;
+
+        /// Solver parameters.
+        struct params {
+            /// Dimension of the shadow space in IDR(s).
+            unsigned s;
+
+            /// Computation of omega.
+            /**
+             * If omega = 0: a standard minimum residual step is performed
+             * If omega > 0: omega is increased if
+             * the cosine of the angle between Ar and r < omega
+             * Default: omega = 0.7;
+             */
+            scalar_type omega;
+
+            /// Specifies if residual smoothing must be applied.
+            bool smoothing;
+
+            /// Residual replacement.
+            /**
+             * Determines the residual replacement strategy.
+             *    If |r| > 1E3 |b| TOL/EPS) (EPS is the machine precision)
+             *    the recursively computed residual is replaced by the true residual
+             *    once |r| < |b| (to reduce the effect of large intermediate residuals 
+             *    on the final accuracy).
+             * Default: No residual replacement.
+             */
+            bool replacement;
+
+            /// Maximum number of iterations.
+            unsigned maxiter;
+
+            /// Target relative residual error.
+            scalar_type tol;
+
+            /// Target absolute residual error.
+            scalar_type abstol;
+
+            params(
+                    unsigned    s           = 4,
+                    scalar_type omega       = 0.7,
+                    bool        smoothing   = false,
+                    bool        replacement = false,
+                    unsigned    maxiter     = 100,
+                    scalar_type tol         = 1e-8
+                  ) :
+                s(s), omega(omega), smoothing(smoothing),
+                replacement(replacement), maxiter(maxiter), tol(tol),
+                abstol(std::numeric_limits<scalar_type>::min())
+            { }
+
+            params(const boost::property_tree::ptree &p)
+                : AMGCL_PARAMS_IMPORT_VALUE(p, s),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, omega),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, smoothing),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, replacement),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, maxiter),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, tol),
+                  AMGCL_PARAMS_IMPORT_VALUE(p, abstol)
+            {
+                AMGCL_PARAMS_CHECK(p, (s)(omega)(smoothing)(replacement)(maxiter)(tol)(abstol));
+            }
+
+            void get(boost::property_tree::ptree &p, const std::string &path) const {
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, s);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, omega);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, smoothing);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, replacement);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, maxiter);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, tol);
+                AMGCL_PARAMS_EXPORT_VALUE(p, path, abstol);
+            }
+        } prm;
+
+        /// Preallocates necessary data structures for the system of size \p n.
+        idrs(
+                size_t n,
+                const params &prm = params(),
+                const backend_params &bprm = backend_params(),
+                const InnerProduct &inner_product = InnerProduct()
+             )
+            : prm(prm), n(n), inner_product(inner_product),
+              M(boost::extents[prm.s][prm.s]),
+              f(prm.s), c(prm.s),
+              r(Backend::create_vector(n, bprm)),
+              v(Backend::create_vector(n, bprm)),
+              t(Backend::create_vector(n, bprm))
+        {
+            static const scalar_type one = math::identity<scalar_type>();
+            static const scalar_type zero = math::zero<scalar_type>();
+
+            if (prm.smoothing) {
+              x_s = Backend::create_vector(n, bprm);
+              r_s = Backend::create_vector(n, bprm);
+            }
+
+            G.reserve(prm.s);
+            U.reserve(prm.s);
+            for(unsigned i = 0; i < prm.s; ++i) {
+                G.push_back(Backend::create_vector(n, bprm));
+                U.push_back(Backend::create_vector(n, bprm));
+            }
+
+            // Initialize P.
+            P.reserve(prm.s);
+            {
+                std::vector<rhs_type> p(n);
+
+#pragma omp parallel
+                {
+#ifdef _OPENMP
+                    int tid = omp_get_thread_num();
+#else
+                    int tid = 0;
+#endif
+
+                    boost::random::mt19937 rng(tid);
+                    boost::random::normal_distribution<scalar_type> rnd;
+
+                    for(unsigned j = 0; j < prm.s; ++j) {
+#pragma omp for
+                        for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i)
+                            p[i] = math::constant<rhs_type>(rnd(rng));
+
+#pragma omp single
+                        {
+                            P.push_back(Backend::copy_vector(p, bprm));
+                        }
+                    }
+                }
+
+                for(unsigned j = 0; j < prm.s; ++j) {
+                    for(unsigned k = 0; k < j; ++k) {
+                        coef_type alpha = inner_product(*P[k], *P[j]);
+                        backend::axpby(-alpha, *P[k], one, *P[j]);
+                    }
+                    scalar_type norm_pj = norm(*P[j]);
+                    backend::axpby(math::inverse(norm_pj), *P[j], zero, *P[j]);
+                }
+            }
+        }
+
+        /* Computes the solution for the given system matrix \p A and the
+         * right-hand side \p rhs.  Returns the number of iterations made and
+         * the achieved residual as a ``boost::tuple``. The solution vector
+         * \p x provides initial approximation in input and holds the computed
+         * solution on output.
+         *
+         * The system matrix may differ from the matrix used during
+         * initialization. This may be used for the solution of non-stationary
+         * problems with slowly changing coefficients. There is a strong chance
+         * that a preconditioner built for a time step will act as a reasonably
+         * good preconditioner for several subsequent time steps [DeSh12]_.
+         */
+        template <class Matrix, class Precond, class Vec1, class Vec2>
+        boost::tuple<size_t, scalar_type> operator()(
+                Matrix  const &A,
+                Precond const &Prec,
+                Vec1    const &rhs,
+                Vec2          &x
+                ) const
+        {
+            static const scalar_type one = math::identity<scalar_type>();
+            static const scalar_type zero = math::zero<scalar_type>();
+
+            scalar_type norm_rhs = norm(rhs);
+            if (norm_rhs < amgcl::detail::eps<scalar_type>(n)) {
+                backend::clear(x);
+                return boost::make_tuple(0, norm_rhs);
+            }
+
+            scalar_type eps = std::max(prm.tol * norm_rhs, prm.abstol);
+
+            // Compute initial residual:
+            backend::residual(rhs, A, x, *r);
+
+            scalar_type res_norm = norm(*r);
+            if (res_norm <= eps) {
+                // Initial guess is a good enough solution.
+                return boost::make_tuple(0, res_norm / norm_rhs);
+            }
+
+            if (prm.smoothing) {
+                backend::copy( x, *x_s);
+                backend::copy(*r, *r_s);
+            }
+
+            // Initialization.
+            coef_type om = math::identity<coef_type>();
+
+            for(unsigned i = 0; i < prm.s; ++i) {
+                backend::clear(*G[i]);
+                backend::clear(*U[i]);
+
+                for(unsigned j = 0; j < prm.s; ++j)
+                    M[i][j] = (i == j);
+            }
+
+            scalar_type eps_replace = norm_rhs / (
+                    // Number close to machine precision:
+                    1e3 * std::numeric_limits<scalar_type>::epsilon());
+
+            // Main iteration loop, build G-spaces:
+            size_t iter = 0;
+            bool trueres = false;
+            while(iter < prm.maxiter && res_norm > eps) {
+                // New righ-hand size for small system:
+                for(unsigned i = 0; i < prm.s; ++i)
+                    f[i] = inner_product(*r, *P[i]);
+
+                for(unsigned k = 0; k < prm.s; ++k) {
+                    // Compute new v
+                    backend::copy(*r, *v);
+
+                    // Solve small system (Note: M is lower triangular)
+                    // and make v orthogonal to P:
+                    for(unsigned i = k; i < prm.s; ++i) {
+                        c[i] = f[i];
+                        for(unsigned j = k; j < i; ++j)
+                            c[i] -= M[i][j] * c[j];
+                        c[i] = math::inverse(M[i][i]) * c[i];
+
+                        backend::axpby(-c[i], *G[i], one, *v);
+                    }
+
+                    Prec.apply(*v, *t);
+
+                    // Compute new U[k]
+                    backend::axpby(om, *t, c[k], *U[k]);
+                    for(unsigned i = k+1; i < prm.s; ++i)
+                        backend::axpby(c[i], *U[i], one, *U[k]);
+
+                    // Compute new G[k], G[k] is in space G_j
+                    backend::spmv(one, A, *U[k], zero, *G[k]);
+
+                    // Bi-Orthogonalise the new basis vectors:
+                    for(unsigned i = 0; i < k; ++i) {
+                        coef_type alpha = inner_product(*G[k], *P[i]) / M[i][i];
+
+                        backend::axpby(-alpha, *G[i], one, *G[k]);
+                        backend::axpby(-alpha, *U[i], one, *U[k]);
+                    }
+
+                    // New column of M = P'*G  (first k-1 entries are zero)
+                    for(unsigned i = k; i < prm.s; ++i)
+                        M[i][k] = backend::inner_product(*G[k], *P[i]);
+
+                    precondition(!math::is_zero(M[k][k]), "IDR(s) breakdown: zero M[k,k]");
+
+                    // Make r orthogonal to q_i, i = [0..k)
+                    coef_type beta = math::inverse(M[k][k]) * f[k];
+                    backend::axpby(-beta, *G[k], one, *r);
+                    backend::axpby( beta, *U[k], one,  x);
+
+                    res_norm = norm(*r);
+
+                    if (prm.replacement && res_norm > eps_replace)
+                        trueres = true;
+
+                    // Smoothing
+                    if (prm.smoothing) {
+                        backend::axpbypcz(one, *r_s, -one, *r, zero, *t);
+                        coef_type gamma = inner_product(*t, *r_s) / inner_product(*t, *t);
+                        backend::axpby(-gamma, *t, one, *r_s);
+                        backend::axpbypcz(-gamma, *x_s, gamma, x, one, *x_s);
+                        res_norm = norm(*r_s);
+                    }
+
+                    if (res_norm <= eps || ++iter >= prm.maxiter) break;
+
+                    // New f = P'*r (first k  components are zero)
+                    for(unsigned i = k + 1; i < prm.s; ++i)
+                        f[i] -= beta * M[i][k];
+                }
+
+                if (res_norm <= eps || iter >= prm.maxiter) break;
+
+                // Now we have sufficient vectors in G_j to compute residual in G_j+1
+                // Note: r is already perpendicular to P so v = r
+
+                Prec.apply(*r, *v);
+                backend::spmv(one, A, *v, zero, *t);
+
+                // Computation of a new omega
+                om = omega(*t, *r);
+                precondition(!math::is_zero(om), "IDR(s) breakdown: zero omega");
+
+                backend::axpby(-om, *t, one, *r);
+                backend::axpby( om, *v, one,  x);
+
+                res_norm = norm(*r);
+                if (prm.replacement && res_norm > eps_replace)
+                    trueres = true;
+
+                // Residual replacement?
+                if (trueres && res_norm < norm_rhs) {
+                    trueres = 0;
+                    backend::residual(rhs, A, x, *r);
+                }
+
+                // Smoothing.
+                if (prm.smoothing) {
+                    backend::axpbypcz(one, *r_s, -one, *r, zero, *t);
+                    coef_type gamma = inner_product(*t, *r_s) / inner_product(*t, *t);
+                    backend::axpby(-gamma, *t, one, *r_s);
+                    backend::axpbypcz(-gamma, *x_s, gamma, x, one, *x_s);
+                    res_norm = norm(*r_s);
+                }
+
+                ++iter;
+            }
+
+            if (prm.smoothing)
+                backend::copy(*x_s, x);
+
+            backend::residual(rhs, A, x, *r);
+            res_norm = norm(*r);
+
+            return boost::make_tuple(iter, res_norm / norm_rhs);
+        }
+
+        /* Computes the solution for the given right-hand side \p rhs. The
+         * system matrix is the same that was used for the setup of the
+         * preconditioner \p P.  Returns the number of iterations made and the
+         * achieved residual as a ``boost::tuple``. The solution vector \p x
+         * provides initial approximation in input and holds the computed
+         * solution on output.
+         */
+        template <class Precond, class Vec1, class Vec2>
+        boost::tuple<size_t, scalar_type> operator()(
+                Precond const &P,
+                Vec1    const &rhs,
+                Vec2          &x
+                ) const
+        {
+            return (*this)(P.system_matrix(), P, rhs, x);
+        }
+
+        friend std::ostream& operator<<(std::ostream &os, const idrs &s) {
+            return os << "IDR(" << s.prm.s << "): " << s.n << " unknowns";
+        }
+
+    private:
+        size_t n;
+
+        InnerProduct inner_product;
+
+        mutable boost::multi_array<coef_type,2> M;
+        mutable std::vector<coef_type> f, c;
+
+        boost::shared_ptr<vector> r, v, t;
+        boost::shared_ptr<vector> x_s;
+        boost::shared_ptr<vector> r_s;
+
+        std::vector< boost::shared_ptr<vector> > P, G, U;
+
+
+        template <class Vec>
+        scalar_type norm(const Vec &x) const {
+            return std::abs(sqrt(inner_product(x, x)));
+        }
+
+        template <class Vector1, class Vector2>
+        coef_type omega(const Vector1 &t, const Vector2 &s) const {
+            scalar_type norm_t = norm(t);
+            scalar_type norm_s = norm(s);
+
+            coef_type   ts  = inner_product(t, s);
+            scalar_type rho = math::norm(ts / (norm_t * norm_s));
+            coef_type   om  = ts / (norm_t * norm_t);
+
+            if (rho < prm.omega)
+                om *= prm.omega/rho;
+
+            return om;
+        }
+};
+
+} // namespace solver
+} // namespace amgcl
+
+#endif

From a9eb953e70a7425a395c39ab34df0bff2ed9ad73 Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 5 Sep 2017 09:47:07 +0200
Subject: [PATCH 11/12] correction to make the mpi solver to work correctly

---
 .../amgcl/mpi/schur_pressure_correction.hpp    | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
index f5e0ed3589b6..60711c9a17e0 100644
--- a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
+++ b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp
@@ -418,8 +418,22 @@ class schur_pressure_correction {
 
             tmp = backend_type::create_vector(nu, bprm);
 
-            if (prm.approx_schur)
-                M = backend_type::copy_vector(diagonal(*Kuu, /*invert = */true), bprm);
+            if (prm.approx_schur) {
+                M = backend_type::create_vector(nu, bprm);
+
+#pragma omp parallel
+                for(ptrdiff_t i = 0; i < nu; ++i) {
+                    // Keep in mind Kuu has global column numeration:
+                    ptrdiff_t dia = i + u_beg;
+                    value_type v = math::zero<value_type>();
+                    for(ptrdiff_t j = Kuu->ptr[i], e = Kuu->ptr[i+1]; j < e; ++j) {
+                        if (Kuu->col[j] == dia) {
+                            v = math::inverse(Kuu->val[j]);
+                        }
+                    }
+                    (*M)[i] = v;
+                }
+            }
 
             // Scatter/Gather matrices
             boost::shared_ptr<build_matrix> x2u = boost::make_shared<build_matrix>();

From 6615d272628dae7b2907294d3f06c4c22e58a93b Mon Sep 17 00:00:00 2001
From: Riccardo Rossi <rrossi@cimne.upc.edu>
Date: Tue, 5 Sep 2017 11:23:27 +0200
Subject: [PATCH 12/12] corrections to the solver settings

---
 .../amgcl_mpi_schur_complement_solver.h       | 103 +++++++++++-------
 1 file changed, 66 insertions(+), 37 deletions(-)

diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
index 95e183012666..94f97c7368bb 100644
--- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
+++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h
@@ -91,7 +91,7 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
                                         },
                                         "pressure_block_preconditioner" :
                                         {
-                                            "krylov_type" : "lgmres",
+                                            "krylov_type" : "fgmres",
                                             "tolerance" : 1e-2,
                                             "preconditioner_type" : "spai0",
                                             "max_iteration": 20
@@ -123,6 +123,35 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
         }    
 
         mCoarseEnough = rParameters["coarse_enough"].GetInt();
+        
+//         {
+//             "solver": {
+//                 "type" : "fgmres",
+//                 "M": 50
+//             },
+//             "precond": {
+//                 "usolver": {
+//                     "solver": {
+//                         "type" : "gmres",
+//                         "tol": 0.001,
+//                         "maxiter": 5
+//                     }
+//                 },
+//                 "psolver": {
+//                     "solver": {
+//                         "type" : "gmres",
+//                         "tol": 0.01,
+//                         "maxiter": 20
+//                     },
+//                     "precond" : {
+//                         "isolver" : {
+//                             "type" : "gmres",
+//                             "maxiter" : 2
+//                         }
+//                     }
+//                 }
+//             }
+//         }   
 
         mTolerance = rParameters["tolerance"].GetDouble();
         mMaxIterations = rParameters["max_iteration"].GetInt();
@@ -185,13 +214,33 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
 
         typedef amgcl::backend::builtin<double> Backend;
         
+        typedef  amgcl::mpi::make_solver<
+            amgcl::mpi::schur_pressure_correction<
+                amgcl::mpi::make_solver<
+                    amgcl::mpi::block_preconditioner<
+                        amgcl::runtime::relaxation::as_preconditioner<Backend>
+                    >,
+                    amgcl::runtime::iterative_solver
+                >,
+                amgcl::mpi::make_solver<
+                    amgcl::mpi::subdomain_deflation<
+                        amgcl::runtime::amg<Backend>,
+                        amgcl::runtime::iterative_solver,
+                        amgcl::runtime::mpi::direct_solver<double>
+                    >,
+                    amgcl::runtime::iterative_solver
+                >
+            >,
+            amgcl::runtime::iterative_solver
+            > SDD;
+        
 //         typedef  amgcl::mpi::make_solver<
 //             amgcl::mpi::schur_pressure_correction<
 //                 amgcl::mpi::make_solver<
 //                     amgcl::mpi::block_preconditioner<
 //                         amgcl::runtime::relaxation::as_preconditioner<Backend>
 //                     >,
-//                     amgcl::runtime::iterative_solver
+//                     amgcl::solver::bicgstab
 //                 >,
 //                 amgcl::mpi::make_solver<
 //                     amgcl::mpi::subdomain_deflation<
@@ -204,46 +253,26 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType,
 //             >,
 //             amgcl::runtime::iterative_solver
 //             > SDD;
-        
-//         typedef  amgcl::mpi::make_solver<
-//             amgcl::mpi::schur_pressure_correction<
-//                 amgcl::mpi::make_solver<
-//                     amgcl::mpi::block_preconditioner<
-//                         amgcl::runtime::relaxation::as_preconditioner<Backend>
+            
+//     typedef amgcl::mpi::make_solver<
+//         amgcl::mpi::schur_pressure_correction<
+//             amgcl::mpi::make_solver<
+//                 amgcl::mpi::block_preconditioner<
+//                     amgcl::runtime::relaxation::as_preconditioner<Backend>
 //                     >,
-//                     amgcl::solver::bicgstab
+//                 amgcl::solver::gmres //amgcl::solver::bicgstab
 //                 >,
-//                 amgcl::mpi::make_solver<
-//                     amgcl::mpi::subdomain_deflation<
-//                         amgcl::runtime::amg<Backend>,
-//                         amgcl::runtime::iterative_solver,
-//                         amgcl::runtime::mpi::direct_solver<double>
+//             amgcl::mpi::make_solver<
+//                 amgcl::mpi::subdomain_deflation<
+//                     amgcl::runtime::amg<Backend>,
+//                     amgcl::solver::gmres, //amgcl::solver::bicgstab,
+//                     amgcl::mpi::skyline_lu<double>
 //                     >,
-//                     amgcl::runtime::iterative_solver
+//                 amgcl::solver::fgmres
 //                 >
 //             >,
-//             amgcl::runtime::iterative_solver
-//             > SDD;
-            
-    typedef amgcl::mpi::make_solver<
-        amgcl::mpi::schur_pressure_correction<
-            amgcl::mpi::make_solver<
-                amgcl::mpi::block_preconditioner<
-                    amgcl::runtime::relaxation::as_preconditioner<Backend>
-                    >,
-                amgcl::runtime::iterative_solver //amgcl::solver::bicgstab
-                >,
-            amgcl::mpi::make_solver<
-                amgcl::mpi::subdomain_deflation<
-                    amgcl::runtime::amg<Backend>,
-                    amgcl::runtime::iterative_solver, //amgcl::solver::bicgstab,
-                    amgcl::mpi::skyline_lu<double>
-                    >,
-                amgcl::solver::fgmres
-                >
-            >,
-        amgcl::solver::fgmres
-        > SDD;
+//         amgcl::solver::fgmres
+//         > SDD;
         
         boost::function<double(ptrdiff_t,unsigned)> dv = amgcl::mpi::constant_deflation(1);
         mprm.put("precond.psolver.precond.num_def_vec", 1);