From e8342fae8b3672e1bcc715ab276897117f09f857 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 1 Aug 2017 11:21:07 +0200 Subject: [PATCH 01/12] initial commit to use parallel version of amgcl schur complement solver - not yet working --- .../add_trilinos_linear_solvers_to_python.cpp | 6 + .../external_includes/mpi_amgcl_ns_solver.h | 380 ++++++++++++++++++ .../trilinos_linear_solver_factory.py | 2 + 3 files changed, 388 insertions(+) create mode 100644 applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h diff --git a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp index b21656b3d2fb..dd10f7bb4bb8 100644 --- a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp +++ b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp @@ -74,6 +74,7 @@ #include "external_includes/ml_solver.h" #include "external_includes/amgcl_solver.h" +#include "external_includes/mpi_amgcl_ns_solver.h" namespace Kratos { @@ -135,6 +136,11 @@ void AddLinearSolvers() .def("SetIntParameter", &AmgclMPISolverType::SetIntParameter) ; + typedef AmgclMPI_NS_Solver AmgclMPI_NS_SolverType; + class_, boost::noncopyable > + ("AmgclMPI_NS_Solver",init()) + ; + enum_("AztecScalingType") .value("NoScaling", NoScaling) .value("LeftScaling", LeftScaling) diff --git a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h new file mode 100644 index 000000000000..5f6e6cca3262 --- /dev/null +++ b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h @@ -0,0 +1,380 @@ +// | / | +// ' / __| _` | __| _ \ __| +// . \ | ( | | ( |\__ ` +// _|\_\_| \__,_|\__|\___/ ____/ +// Multi-Physics +// +// License: BSD License +// Kratos default license: kratos/license.txt +// +// Main authors: Denis Demidov +// Riccardo Rossi +// + + +#if !defined(KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED ) +#define KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED + + +#ifndef AMGCL_PARAM_UNKNOWN +# define AMGCL_PARAM_UNKNOWN(name) \ + std::cerr << "AMGCL WARNING: unknown parameter " << name << std::endl +#endif + + +// External includes +#include "boost/smart_ptr.hpp" + +// Project includes +#include "includes/define.h" +#include "includes/kratos_parameters.h" +#include "linear_solvers/linear_solver.h" +#include "external_includes/amgcl_solver.h" + +//aztec solver includes +#include "AztecOO.h" +#include "Epetra_LinearProblem.h" +//#include "Teuchos_ParameterList.hpp" + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include //needed to print AMGCL internal settings + + + +namespace Kratos +{ + + + +template< class TSparseSpaceType, class TDenseSpaceType, + class TReordererType = Reorderer > +class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, + TDenseSpaceType, TReordererType> +{ +public: + /** + * Counted pointer of AmgclMPI_NS_Solver + */ + KRATOS_CLASS_POINTER_DEFINITION ( AmgclMPI_NS_Solver ); + + typedef LinearSolver BaseType; + + typedef typename TSparseSpaceType::MatrixType SparseMatrixType; + + typedef typename TSparseSpaceType::VectorType VectorType; + + typedef typename TDenseSpaceType::MatrixType DenseMatrixType; + + + AmgclMPI_NS_Solver ( Parameters rParameters ) + { + Parameters default_parameters( R"( + { + "solver_type" : "AmgclMPI_NS_Solver", + "krylov_type" : "fgmres", + "velocity_block_preconditioner" : + { + "krylov_type" : "lgmres", + "tolerance" : 1e-3, + "preconditioner_type" : "ilu0", + "max_iteration": 5 + }, + "pressure_block_preconditioner" : + { + "krylov_type" : "lgmres", + "tolerance" : 1e-2, + "preconditioner_type" : "spai0", + "max_iteration": 20 + }, + "tolerance" : 1e-9, + "gmres_krylov_space_dimension": 50, + "coarsening_type": "aggregation", + "max_iteration": 50, + "verbosity" : 1, + "scaling": false, + "coarse_enough" : 5000 + } )" ); + + + //now validate agains defaults -- this also ensures no type mismatch + rParameters.ValidateAndAssignDefaults(default_parameters); + + std::stringstream msg; + + //validate if values are admissible + std::set available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"}; + + //check velocity block settings + if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end()) + { + msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl; + msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl; + KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str()); + } + + mcoarse_enough = rParameters["coarse_enough"].GetInt(); + + mtol = rParameters["tolerance"].GetDouble(); + mmax_it = rParameters["max_iteration"].GetInt(); + mverbosity=rParameters["verbosity"].GetInt(); + mprm.put("solver.type", rParameters["krylov_type"].GetString()); + + if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres") + mprm.put("solver.M", rParameters["gmres_krylov_space_dimension"].GetInt()); + + //setting velocity solver options + mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString()); + mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble()); + mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt()); + mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); + + //setting pressure solver options + mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); + mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); + mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); + mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); + mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0); + mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1); + mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough); + + } + + + /** + * Destructor + */ + virtual ~AmgclMPI_NS_Solver() {} + + /** + * Normal solve method. + * Solves the linear system Ax=b and puts the result on SystemVector& rX. + * rX is also th initial guess for iterative methods. + * @param rA. System matrix + * @param rX. Solution vector. + * @param rB. Right hand side vector. + */ + bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB ) + { + KRATOS_TRY + + using amgcl::prof; + prof.reset(); + + amgcl::mpi::communicator world ( MPI_COMM_WORLD ); + if ( mverbosity >=0 && world.rank == 0 ) { + std::cout << "World size: " << world.size << std::endl; + } + + int chunk = rA.NumMyRows(); + boost::iterator_range xrange ( rX.Values(), rX.Values() + chunk ); + boost::iterator_range frange ( rB.Values(), rB.Values() + chunk ); + + if ( mverbosity > 1 && world.rank == 0 ) { + write_json ( std::cout, mprm ); + } + + typedef amgcl::backend::builtin Backend; + + typedef amgcl::mpi::make_solver< + amgcl::mpi::schur_pressure_correction< + amgcl::mpi::make_solver< + amgcl::mpi::block_preconditioner< + amgcl::runtime::relaxation::as_preconditioner + >, + amgcl::runtime::iterative_solver + >, + amgcl::mpi::make_solver< + amgcl::mpi::subdomain_deflation< + amgcl::runtime::amg, + amgcl::runtime::iterative_solver, + amgcl::runtime::mpi::direct_solver + >, + amgcl::runtime::iterative_solver + > + >, + amgcl::runtime::iterative_solver + > SDD; + + boost::function dv = amgcl::mpi::constant_deflation(1); + mprm.put("precond.psolver.precond.num_def_vec", 1); + mprm.put("precond.psolver.precond.def_vec", &dv); + + mprm.put("precond.pmask", static_cast(&mp[0])); + mprm.put("precond.pmask_size", mp.size()); + + prof.tic ( "setup" ); + SDD solve ( world, amgcl::backend::map ( rA ), mprm ); + double tm_setup = prof.toc ( "setup" ); + + prof.tic ( "Solve" ); + size_t iters; + double resid; + boost::tie ( iters, resid ) = solve ( frange, xrange ); + double solve_tm = prof.toc ( "Solve" ); + + + + if ( rA.Comm().MyPID() == 0 ) { + if ( mverbosity > 0 ) { + std::cout + << "------- AMGCL -------\n" << std::endl + << "Iterations : " << iters << std::endl + << "Error : " << resid << std::endl + << "amgcl setup time: " << tm_setup << std::endl + << "amgcl solve time: " << solve_tm << std::endl; + } + + if ( mverbosity > 1 ) { + std::cout << prof << std::endl; + } + } + + + + return true; + + + + + + KRATOS_CATCH ( "" ); + } + + /** + * Multi solve method for solving a set of linear systems with same coefficient matrix. + * Solves the linear system Ax=b and puts the result on SystemVector& rX. + * rX is also th initial guess for iterative methods. + * @param rA. System matrix + * @param rX. Solution vector. + * @param rB. Right hand side vector. + */ + bool Solve ( SparseMatrixType& rA, DenseMatrixType& rX, DenseMatrixType& rB ) + { + + return false; + } + + /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example + * when solving a mixed u-p problem, it is important to identify the row associated to v and p. + * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers + * which require knowledge on the spatial position of the nodes associated to a given dof. + * This function tells if the solver requires such data + */ + virtual bool AdditionalPhysicalDataIsNeeded() + { + return true; + } + + /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example + * when solving a mixed u-p problem, it is important to identify the row associated to v and p. + * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers + * which require knowledge on the spatial position of the nodes associated to a given dof. + * This function is the place to eventually provide such data + */ + void ProvideAdditionalData ( + SparseMatrixType& rA, + VectorType& rX, + VectorType& rB, + typename ModelPart::DofsArrayType& rdof_set, + ModelPart& r_model_part + ) + { + int my_pid = rA.Comm().MyPID(); + + //filling the pressure mask + if(mp.size() != static_cast(rA.NumMyRows())) mp.resize( rA.NumMyRows() ); + + int counter = 0.0; + for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++) + { + if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid ) + { + mp[counter] = (it->GetVariable().Key() == PRESSURE); + counter++; + } + } + + } + + /** + * Print information about this object. + */ + void PrintInfo ( std::ostream& rOStream ) const + { + rOStream << "AMGCL_MPI solver finished."; + } + + /** + * Print object's data. + */ + void PrintData ( std::ostream& rOStream ) const + { + } + +private: + + double mtol; + int mmax_it; + int mverbosity; + unsigned int mcoarse_enough; + bool muse_linear_deflation = false; + bool mprovide_coordinates = false; + bool muse_block_matrices_if_possible = false; + + std::vector< char > mp; //pressure mask + boost::property_tree::ptree mprm; + + + /** + * Assignment operator. + */ + AmgclMPI_NS_Solver& operator= ( const AmgclMPI_NS_Solver& Other ); + + /** + * Copy constructor. + */ + AmgclMPI_NS_Solver ( const AmgclMPI_NS_Solver& Other ); + +}; + + +/** + * input stream function + */ +template +inline std::istream& operator >> ( std::istream& rIStream, AmgclMPI_NS_Solver< TSparseSpaceType, + TDenseSpaceType, TReordererType>& rThis ) +{ + return rIStream; +} + +/** + * output stream function + */ +template +inline std::ostream& operator << ( std::ostream& rOStream, + const AmgclMPI_NS_Solver& rThis ) +{ + rThis.PrintInfo ( rOStream ); + rOStream << std::endl; + rThis.PrintData ( rOStream ); + + return rOStream; +} + + +} // namespace Kratos. + +#endif // KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED defined + + diff --git a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py index fd86f0169cb9..e610c456bc48 100644 --- a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py +++ b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py @@ -21,6 +21,8 @@ def ConstructSolver(configuration): linear_solver = MultiLevelSolver(configuration) elif(solver_type == "AmgclMPISolver"): linear_solver = AmgclMPISolver(configuration); + elif(solver_type == "AmgclMPI_NS_Solver"): + linear_solver = AmgclMPI_NS_Solver(configuration); # elif(solver_type == "Deflated Conjugate gradient"): raise Exception("not implemented within trilinos") From a1b62e37bf9543344e8adf379e1b2957828aee84 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 1 Aug 2017 15:03:08 +0200 Subject: [PATCH 02/12] some debugging output --- .../external_includes/mpi_amgcl_ns_solver.h | 153 ++++++++++-------- 1 file changed, 82 insertions(+), 71 deletions(-) diff --git a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h index 5f6e6cca3262..52521a58bd55 100644 --- a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h +++ b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h @@ -77,74 +77,74 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, AmgclMPI_NS_Solver ( Parameters rParameters ) { - Parameters default_parameters( R"( - { - "solver_type" : "AmgclMPI_NS_Solver", - "krylov_type" : "fgmres", - "velocity_block_preconditioner" : - { - "krylov_type" : "lgmres", - "tolerance" : 1e-3, - "preconditioner_type" : "ilu0", - "max_iteration": 5 - }, - "pressure_block_preconditioner" : - { - "krylov_type" : "lgmres", - "tolerance" : 1e-2, - "preconditioner_type" : "spai0", - "max_iteration": 20 - }, - "tolerance" : 1e-9, - "gmres_krylov_space_dimension": 50, - "coarsening_type": "aggregation", - "max_iteration": 50, - "verbosity" : 1, - "scaling": false, - "coarse_enough" : 5000 - } )" ); - - - //now validate agains defaults -- this also ensures no type mismatch - rParameters.ValidateAndAssignDefaults(default_parameters); - - std::stringstream msg; - - //validate if values are admissible - std::set available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"}; - - //check velocity block settings - if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end()) - { - msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl; - msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl; - KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str()); - } - - mcoarse_enough = rParameters["coarse_enough"].GetInt(); - - mtol = rParameters["tolerance"].GetDouble(); - mmax_it = rParameters["max_iteration"].GetInt(); - mverbosity=rParameters["verbosity"].GetInt(); - mprm.put("solver.type", rParameters["krylov_type"].GetString()); - - if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres") - mprm.put("solver.M", rParameters["gmres_krylov_space_dimension"].GetInt()); - - //setting velocity solver options - mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString()); - mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble()); - mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt()); - mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); - - //setting pressure solver options - mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); - mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); - mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); - mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); - mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0); - mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1); - mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough); +// Parameters default_parameters( R"( +// { +// "solver_type" : "AmgclMPI_NS_Solver", +// "krylov_type" : "fgmres", +// "velocity_block_preconditioner" : +// { +// "krylov_type" : "lgmres", +// "tolerance" : 1e-3, +// "preconditioner_type" : "ilu0", +// "max_iteration": 5 +// }, +// "pressure_block_preconditioner" : +// { +// "krylov_type" : "lgmres", +// "tolerance" : 1e-2, +// "preconditioner_type" : "spai0", +// "max_iteration": 20 +// }, +// "tolerance" : 1e-9, +// "gmres_krylov_space_dimension": 50, +// "coarsening_type": "aggregation", +// "max_iteration": 50, +// "verbosity" : 1, +// "scaling": false, +// "coarse_enough" : 5000 +// } )" ); +// +// +// //now validate agains defaults -- this also ensures no type mismatch +// rParameters.ValidateAndAssignDefaults(default_parameters); +// +// std::stringstream msg; +// +// //validate if values are admissible +// std::set available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"}; +// +// //check velocity block settings +// if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end()) +// { +// msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl; +// msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl; +// KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str()); +// } +// +// mcoarse_enough = rParameters["coarse_enough"].GetInt(); +// +// mtol = rParameters["tolerance"].GetDouble(); +// mmax_it = rParameters["max_iteration"].GetInt(); +// mverbosity=rParameters["verbosity"].GetInt(); +// mprm.put("solver.type", rParameters["krylov_type"].GetString()); +// +// if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres") +// mprm.put("solver.M", rParameters["gmres_krylov_space_dimension"].GetInt()); +// +// //setting velocity solver options +// mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString()); +// mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble()); +// mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt()); +// mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); +// +// //setting pressure solver options +// mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); +// mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); +// mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); +// mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); +// mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0); +// mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1); +// mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough); } @@ -165,22 +165,26 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB ) { KRATOS_TRY - +KRATOS_WATCH(__LINE__) using amgcl::prof; prof.reset(); +KRATOS_WATCH(__LINE__) amgcl::mpi::communicator world ( MPI_COMM_WORLD ); if ( mverbosity >=0 && world.rank == 0 ) { std::cout << "World size: " << world.size << std::endl; } +KRATOS_WATCH(__LINE__) int chunk = rA.NumMyRows(); boost::iterator_range xrange ( rX.Values(), rX.Values() + chunk ); boost::iterator_range frange ( rB.Values(), rB.Values() + chunk ); +KRATOS_WATCH(__LINE__) if ( mverbosity > 1 && world.rank == 0 ) { write_json ( std::cout, mprm ); } +KRATOS_WATCH(__LINE__) typedef amgcl::backend::builtin Backend; @@ -203,6 +207,7 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, >, amgcl::runtime::iterative_solver > SDD; +KRATOS_WATCH(__LINE__) boost::function dv = amgcl::mpi::constant_deflation(1); mprm.put("precond.psolver.precond.num_def_vec", 1); @@ -210,10 +215,12 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, mprm.put("precond.pmask", static_cast(&mp[0])); mprm.put("precond.pmask_size", mp.size()); +KRATOS_WATCH(__LINE__) prof.tic ( "setup" ); SDD solve ( world, amgcl::backend::map ( rA ), mprm ); double tm_setup = prof.toc ( "setup" ); +KRATOS_WATCH(__LINE__) prof.tic ( "Solve" ); size_t iters; @@ -288,12 +295,13 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, ModelPart& r_model_part ) { +KRATOS_WATCH(__LINE__) int my_pid = rA.Comm().MyPID(); //filling the pressure mask if(mp.size() != static_cast(rA.NumMyRows())) mp.resize( rA.NumMyRows() ); - - int counter = 0.0; +KRATOS_WATCH(__LINE__) + int counter = 0; for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++) { if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid ) @@ -302,6 +310,9 @@ class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, counter++; } } +KRATOS_WATCH(__LINE__) + if(counter != rA.NumMyRows()) + KRATOS_ERROR << "pressure mask as a size " << mp.size() << " which does not correspond with the number of local rows:" << rA.NumMyRows() << std::endl; } From c7d3400a06d6b23e464061b870d1433342f86459 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 8 Aug 2017 08:04:45 +0200 Subject: [PATCH 03/12] some corrections to mpi solvers --- .../trilinos_linear_solver_factory.py | 5 +- .../amgcl/mpi/subdomain_deflation.hpp | 142 ++++++++++-------- 2 files changed, 85 insertions(+), 62 deletions(-) diff --git a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py index e610c456bc48..853140f1616e 100644 --- a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py +++ b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py @@ -55,10 +55,11 @@ def ConstructSolver(configuration): raise Exception("not implemented within trilinos") else: print("*****************************************************************") - print("Inexisting solver type. Possibilities are:") + print("Inexisting solver type. Specified::::::::::: ", solver_type) + print(" Possibilities are:") print("............") print("*****************************************************************") - + raise Exception("please specify a correct solver") # else: # except LogicError: diff --git a/external_libraries/amgcl/mpi/subdomain_deflation.hpp b/external_libraries/amgcl/mpi/subdomain_deflation.hpp index 79e7a4ae68bc..f9c74f3cc2d8 100644 --- a/external_libraries/amgcl/mpi/subdomain_deflation.hpp +++ b/external_libraries/amgcl/mpi/subdomain_deflation.hpp @@ -145,7 +145,7 @@ class subdomain_deflation { q( backend_type::create_vector(nrows, bprm) ), S(nrows, prm.isolver, bprm, mpi::inner_product(mpi_comm)) { - TIC("setup deflation"); + AMGCL_TIC("setup deflation"); typedef backend::crs build_matrix; typedef typename backend::row_iterator::type row_iterator1; typedef typename backend::row_iterator::type row_iterator2; @@ -170,7 +170,7 @@ class subdomain_deflation { ptrdiff_t loc_end = domain[comm.rank + 1]; // Fill deflation vectors. - TIC("copy deflation vectors"); + AMGCL_TIC("copy deflation vectors"); { std::vector z(nrows); for(int j = 0; j < ndv; ++j) { @@ -179,12 +179,12 @@ class subdomain_deflation { Z[j] = backend_type::copy_vector(z, bprm); } } - TOC("copy deflation vectors"); + AMGCL_TOC("copy deflation vectors"); // Number of nonzeros in local and remote parts of the matrix. ptrdiff_t loc_nnz = 0, rem_nnz = 0; - TIC("first pass"); + AMGCL_TIC("first pass"); // First pass over Astrip rows: // 1. Count local and remote nonzeros, // 3. Build sparsity pattern of matrix AZ. @@ -218,9 +218,9 @@ class subdomain_deflation { } } } - TOC("first pass"); + AMGCL_TOC("first pass"); - TIC("second pass"); + AMGCL_TIC("second pass"); // Second pass over Astrip rows: // 1. Build local and remote matrix parts. // 2. Build local part of AZ matrix. @@ -273,7 +273,7 @@ class subdomain_deflation { aloc->ptr[i+1] = loc_head; arem->ptr[i+1] = rem_head; } - TOC("second pass"); + AMGCL_TOC("second pass"); // Create local preconditioner. P = boost::make_shared( *aloc, prm.local, bprm ); @@ -284,7 +284,7 @@ class subdomain_deflation { Arem = backend_type::copy_matrix(arem, bprm); A = boost::make_shared(*C, P->system_matrix(), *Arem); - TIC("A*Z"); + AMGCL_TIC("A*Z"); /* Finish construction of AZ */ // Exchange deflation vectors std::vector zrecv_ptr(C->recv.nbr.size() + 1, 0); @@ -358,12 +358,12 @@ class subdomain_deflation { std::rotate(az->ptr, az->ptr + nrows, az->ptr + nrows + 1); az->ptr[0] = 0; - TOC("A*Z"); + AMGCL_TOC("A*Z"); MPI_Waitall(C->send.req.size(), &C->send.req[0], MPI_STATUSES_IGNORE); /* Build deflated matrix E. */ - TIC("assemble E"); + AMGCL_TIC("assemble E"); // Who is responsible for solution of coarse problem int nmasters = std::min(comm.size, DirectSolver::comm_size(nz, prm.dsolver)); int nslaves = (comm.size + nmasters - 1) / nmasters; @@ -408,13 +408,18 @@ class subdomain_deflation { std::vector Eptr; if (comm.rank == master_rank) { Eptr.resize(dv_start[cgroup_end] - dv_start[cgroup_beg] + 1, 0); - } - MPI_Gatherv( - &eptr[1], ndv, MPI_INT, &Eptr[0] + 1, - const_cast(&ssize[0]), const_cast(&sstart[0]), - MPI_INT, 0, slaves_comm - ); + MPI_Gatherv( + &eptr[1], ndv, MPI_INT, &Eptr[0] + 1, + const_cast(&ssize[0]), const_cast(&sstart[0]), + MPI_INT, 0, slaves_comm + ); + } else { + MPI_Gatherv( + &eptr[1], ndv, MPI_INT, NULL, NULL, NULL, + MPI_INT, 0, slaves_comm + ); + } std::partial_sum(eptr.begin(), eptr.end(), eptr.begin()); std::partial_sum(Eptr.begin(), Eptr.end(), Eptr.begin()); @@ -460,23 +465,33 @@ class subdomain_deflation { sstart[i] = Eptr[dv_start[p] - offset]; ssize[i] = Eptr[dv_start[p + 1] - offset] - sstart[i]; } - } - MPI_Gatherv( - &ecol[0], ecol.size(), MPI_INT, &Ecol[0], - const_cast(&ssize[0]), const_cast(&sstart[0]), - MPI_INT, 0, slaves_comm - ); + MPI_Gatherv( + &ecol[0], ecol.size(), MPI_INT, &Ecol[0], + const_cast(&ssize[0]), const_cast(&sstart[0]), + MPI_INT, 0, slaves_comm + ); - MPI_Gatherv( - &eval[0], eval.size(), dtype, &Eval[0], - const_cast(&ssize[0]), const_cast(&sstart[0]), - dtype, 0, slaves_comm - ); - TOC("assemble E"); + MPI_Gatherv( + &eval[0], eval.size(), dtype, &Eval[0], + const_cast(&ssize[0]), const_cast(&sstart[0]), + dtype, 0, slaves_comm + ); + } else { + MPI_Gatherv( + &ecol[0], ecol.size(), MPI_INT, NULL, NULL, NULL, + MPI_INT, 0, slaves_comm + ); + + MPI_Gatherv( + &eval[0], eval.size(), dtype, NULL, NULL, NULL, + dtype, 0, slaves_comm + ); + } + AMGCL_TOC("assemble E"); // Prepare E factorization. - TIC("factorize E"); + AMGCL_TIC("factorize E"); if (comm.rank == master_rank) { E = boost::make_shared( masters_comm, Eptr.size() - 1, Eptr, Ecol, Eval, prm.dsolver @@ -485,9 +500,9 @@ class subdomain_deflation { cf.resize(Eptr.size() - 1); cx.resize(Eptr.size() - 1); } - TOC("factorize E"); + AMGCL_TOC("factorize E"); - TOC("setup deflation"); + AMGCL_TOC("setup deflation"); // Move matrices to backend. AZ = backend_type::copy_matrix(az, bprm); @@ -546,18 +561,18 @@ class subdomain_deflation { template void mul(value_type alpha, const Vec1 &x, value_type beta, Vec2 &y) const { - TIC("top/spmv"); + AMGCL_TIC("top/spmv"); backend::spmv(alpha, *A, x, beta, y); - TOC("top/spmv"); + AMGCL_TOC("top/spmv"); project(y); } template void residual(const Vec1 &f, const Vec2 &x, Vec3 &r) const { - TIC("top/residual"); + AMGCL_TIC("top/residual"); backend::residual(f, *A, x, r); - TOC("top/residual"); + AMGCL_TOC("top/residual"); project(r); } @@ -599,67 +614,74 @@ class subdomain_deflation { template void project(Vector &x) const { - TIC("project"); + AMGCL_TIC("project"); - TIC("local inner product"); + AMGCL_TIC("local inner product"); for(ptrdiff_t j = 0; j < ndv; ++j) df[j] = backend::inner_product(x, *Z[j]); - TOC("local inner product"); + AMGCL_TOC("local inner product"); coarse_solve(df, dx); - TIC("spmv"); + AMGCL_TIC("spmv"); backend::copy_to_backend(dx, *dd); backend::spmv(-1, *AZ, *dd, 1, x); - TOC("spmv"); + AMGCL_TOC("spmv"); - TOC("project"); + AMGCL_TOC("project"); } void coarse_solve(std::vector &f, std::vector &x) const { - TIC("coarse solve"); - TIC("exchange rhs"); - MPI_Gatherv( - &f[0], f.size(), dtype, &cf[0], - const_cast(&ssize[0]), const_cast(&sstart[0]), - dtype, 0, slaves_comm - ); - TOC("exchange rhs"); + AMGCL_TIC("coarse solve"); + AMGCL_TIC("exchange rhs"); + if (comm.rank == master_rank) { + MPI_Gatherv( + &f[0], f.size(), dtype, &cf[0], + const_cast(&ssize[0]), const_cast(&sstart[0]), + dtype, 0, slaves_comm + ); + } else { + MPI_Gatherv( + &f[0], f.size(), dtype, NULL, NULL, NULL, + dtype, 0, slaves_comm + ); + } + AMGCL_TOC("exchange rhs"); if (comm.rank == master_rank) { - TIC("call solver"); + AMGCL_TIC("call solver"); (*E)(cf, cx); - TOC("call solver"); + AMGCL_TOC("call solver"); - TIC("gather result"); + AMGCL_TIC("gather result"); MPI_Gatherv( &cx[0], cx.size(), dtype, &x[0], const_cast(&msize[0]), const_cast(&mstart[0]), dtype, 0, masters_comm ); - TOC("gather result"); + AMGCL_TOC("gather result"); } - TIC("broadcast result"); + AMGCL_TIC("broadcast result"); MPI_Bcast(&x[0], x.size(), dtype, 0, comm); - TOC("broadcast result"); - TOC("coarse solve"); + AMGCL_TOC("broadcast result"); + AMGCL_TOC("coarse solve"); } template void postprocess(const Vec1 &rhs, Vec2 &x) const { - TIC("postprocess"); + AMGCL_TIC("postprocess"); // q = Ax backend::spmv(1, *A, x, 0, *q); // df = transp(Z) * (rhs - Ax) - TIC("local inner product"); + AMGCL_TIC("local inner product"); for(ptrdiff_t j = 0; j < ndv; ++j) df[j] = backend::inner_product(rhs, *Z[j]) - backend::inner_product(*q, *Z[j]); - TOC("local inner product"); + AMGCL_TOC("local inner product"); // dx = inv(E) * df coarse_solve(df, dx); @@ -667,7 +689,7 @@ class subdomain_deflation { // x += Z * dx backend::lin_comb(ndv, &dx[dv_start[comm.rank]], Z, 1, x); - TOC("postprocess"); + AMGCL_TOC("postprocess"); } }; From 9bb13caaedfa544dec02d68f2544a5281c00b51f Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 8 Aug 2017 08:38:05 +0200 Subject: [PATCH 04/12] renaming a macro --- .../amgcl/mpi/subdomain_deflation.hpp | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/external_libraries/amgcl/mpi/subdomain_deflation.hpp b/external_libraries/amgcl/mpi/subdomain_deflation.hpp index f9c74f3cc2d8..97aae2c3e1d1 100644 --- a/external_libraries/amgcl/mpi/subdomain_deflation.hpp +++ b/external_libraries/amgcl/mpi/subdomain_deflation.hpp @@ -145,7 +145,7 @@ class subdomain_deflation { q( backend_type::create_vector(nrows, bprm) ), S(nrows, prm.isolver, bprm, mpi::inner_product(mpi_comm)) { - AMGCL_TIC("setup deflation"); + TIC("setup deflation"); typedef backend::crs build_matrix; typedef typename backend::row_iterator::type row_iterator1; typedef typename backend::row_iterator::type row_iterator2; @@ -170,7 +170,7 @@ class subdomain_deflation { ptrdiff_t loc_end = domain[comm.rank + 1]; // Fill deflation vectors. - AMGCL_TIC("copy deflation vectors"); + TIC("copy deflation vectors"); { std::vector z(nrows); for(int j = 0; j < ndv; ++j) { @@ -179,12 +179,12 @@ class subdomain_deflation { Z[j] = backend_type::copy_vector(z, bprm); } } - AMGCL_TOC("copy deflation vectors"); + TOC("copy deflation vectors"); // Number of nonzeros in local and remote parts of the matrix. ptrdiff_t loc_nnz = 0, rem_nnz = 0; - AMGCL_TIC("first pass"); + TIC("first pass"); // First pass over Astrip rows: // 1. Count local and remote nonzeros, // 3. Build sparsity pattern of matrix AZ. @@ -218,9 +218,9 @@ class subdomain_deflation { } } } - AMGCL_TOC("first pass"); + TOC("first pass"); - AMGCL_TIC("second pass"); + TIC("second pass"); // Second pass over Astrip rows: // 1. Build local and remote matrix parts. // 2. Build local part of AZ matrix. @@ -273,7 +273,7 @@ class subdomain_deflation { aloc->ptr[i+1] = loc_head; arem->ptr[i+1] = rem_head; } - AMGCL_TOC("second pass"); + TOC("second pass"); // Create local preconditioner. P = boost::make_shared( *aloc, prm.local, bprm ); @@ -284,7 +284,7 @@ class subdomain_deflation { Arem = backend_type::copy_matrix(arem, bprm); A = boost::make_shared(*C, P->system_matrix(), *Arem); - AMGCL_TIC("A*Z"); + TIC("A*Z"); /* Finish construction of AZ */ // Exchange deflation vectors std::vector zrecv_ptr(C->recv.nbr.size() + 1, 0); @@ -358,12 +358,12 @@ class subdomain_deflation { std::rotate(az->ptr, az->ptr + nrows, az->ptr + nrows + 1); az->ptr[0] = 0; - AMGCL_TOC("A*Z"); + TOC("A*Z"); MPI_Waitall(C->send.req.size(), &C->send.req[0], MPI_STATUSES_IGNORE); /* Build deflated matrix E. */ - AMGCL_TIC("assemble E"); + TIC("assemble E"); // Who is responsible for solution of coarse problem int nmasters = std::min(comm.size, DirectSolver::comm_size(nz, prm.dsolver)); int nslaves = (comm.size + nmasters - 1) / nmasters; @@ -488,10 +488,10 @@ class subdomain_deflation { dtype, 0, slaves_comm ); } - AMGCL_TOC("assemble E"); + TOC("assemble E"); // Prepare E factorization. - AMGCL_TIC("factorize E"); + TIC("factorize E"); if (comm.rank == master_rank) { E = boost::make_shared( masters_comm, Eptr.size() - 1, Eptr, Ecol, Eval, prm.dsolver @@ -500,9 +500,9 @@ class subdomain_deflation { cf.resize(Eptr.size() - 1); cx.resize(Eptr.size() - 1); } - AMGCL_TOC("factorize E"); + TOC("factorize E"); - AMGCL_TOC("setup deflation"); + TOC("setup deflation"); // Move matrices to backend. AZ = backend_type::copy_matrix(az, bprm); @@ -561,18 +561,18 @@ class subdomain_deflation { template void mul(value_type alpha, const Vec1 &x, value_type beta, Vec2 &y) const { - AMGCL_TIC("top/spmv"); + TIC("top/spmv"); backend::spmv(alpha, *A, x, beta, y); - AMGCL_TOC("top/spmv"); + TOC("top/spmv"); project(y); } template void residual(const Vec1 &f, const Vec2 &x, Vec3 &r) const { - AMGCL_TIC("top/residual"); + TIC("top/residual"); backend::residual(f, *A, x, r); - AMGCL_TOC("top/residual"); + TOC("top/residual"); project(r); } @@ -614,27 +614,27 @@ class subdomain_deflation { template void project(Vector &x) const { - AMGCL_TIC("project"); + TIC("project"); - AMGCL_TIC("local inner product"); + TIC("local inner product"); for(ptrdiff_t j = 0; j < ndv; ++j) df[j] = backend::inner_product(x, *Z[j]); - AMGCL_TOC("local inner product"); + TOC("local inner product"); coarse_solve(df, dx); - AMGCL_TIC("spmv"); + TIC("spmv"); backend::copy_to_backend(dx, *dd); backend::spmv(-1, *AZ, *dd, 1, x); - AMGCL_TOC("spmv"); + TOC("spmv"); - AMGCL_TOC("project"); + TOC("project"); } void coarse_solve(std::vector &f, std::vector &x) const { - AMGCL_TIC("coarse solve"); - AMGCL_TIC("exchange rhs"); + TIC("coarse solve"); + TIC("exchange rhs"); if (comm.rank == master_rank) { MPI_Gatherv( &f[0], f.size(), dtype, &cf[0], @@ -647,41 +647,41 @@ class subdomain_deflation { dtype, 0, slaves_comm ); } - AMGCL_TOC("exchange rhs"); + TOC("exchange rhs"); if (comm.rank == master_rank) { - AMGCL_TIC("call solver"); + TIC("call solver"); (*E)(cf, cx); - AMGCL_TOC("call solver"); + TOC("call solver"); - AMGCL_TIC("gather result"); + TIC("gather result"); MPI_Gatherv( &cx[0], cx.size(), dtype, &x[0], const_cast(&msize[0]), const_cast(&mstart[0]), dtype, 0, masters_comm ); - AMGCL_TOC("gather result"); + TOC("gather result"); } - AMGCL_TIC("broadcast result"); + TIC("broadcast result"); MPI_Bcast(&x[0], x.size(), dtype, 0, comm); - AMGCL_TOC("broadcast result"); - AMGCL_TOC("coarse solve"); + TOC("broadcast result"); + TOC("coarse solve"); } template void postprocess(const Vec1 &rhs, Vec2 &x) const { - AMGCL_TIC("postprocess"); + TIC("postprocess"); // q = Ax backend::spmv(1, *A, x, 0, *q); // df = transp(Z) * (rhs - Ax) - AMGCL_TIC("local inner product"); + TIC("local inner product"); for(ptrdiff_t j = 0; j < ndv; ++j) df[j] = backend::inner_product(rhs, *Z[j]) - backend::inner_product(*q, *Z[j]); - AMGCL_TOC("local inner product"); + TOC("local inner product"); // dx = inv(E) * df coarse_solve(df, dx); @@ -689,7 +689,7 @@ class subdomain_deflation { // x += Z * dx backend::lin_comb(ndv, &dx[dv_start[comm.rank]], Z, 1, x); - AMGCL_TOC("postprocess"); + TOC("postprocess"); } }; From 3b5970aef5c850d803298df14728413b7b75fe71 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 8 Aug 2017 10:03:46 +0200 Subject: [PATCH 05/12] renaming and cleaning up the amgcl_mpi_solvers --- .../add_trilinos_linear_solvers_to_python.cpp | 34 +- .../amgcl_mpi_schur_complement_solver.h | 378 ++++++++++++++++++ .../trilinos_linear_solver_factory.py | 4 +- 3 files changed, 385 insertions(+), 31 deletions(-) create mode 100644 applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h diff --git a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp index dd10f7bb4bb8..72c0211bb9a4 100644 --- a/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp +++ b/applications/trilinos_application/custom_python/add_trilinos_linear_solvers_to_python.cpp @@ -34,30 +34,6 @@ #include "spaces/ublas_space.h" #include "includes/model_part.h" -//strategies -// #include "solving_strategies/strategies/solving_strategy.h" -// #include "solving_strategies/strategies/residualbased_linear_strategy.h" -// #include "solving_strategies/strategies/residualbased_newton_raphson_strategy.h" - -//schemes -// #include "solving_strategies/schemes/scheme.h" -// #include "custom_strategies/schemes/trilinos_residualbased_incrementalupdate_static_scheme.h" -// #include "custom_strategies/schemes/trilinos_residualbased_lagrangian_monolithic_scheme.h" -// #include "../../incompressible_fluid_application/custom_strategies/strategies/residualbased_predictorcorrector_velocity_bossak_scheme.h" -// #include "custom_strategies/schemes/trilinos_predictorcorrector_velocity_bossak_scheme.h" - -//convergence criterias -// #include "solving_strategies/convergencecriterias/convergence_criteria.h" -// #include "solving_strategies/convergencecriterias/displacement_criteria.h" -// -// //Builder And Solver -// // #include "solving_strategies/builder_and_solvers/builder_and_solver.h" -// #include "custom_strategies/builder_and_solvers/trilinos_residualbased_elimination_builder_and_solver.h" -// #include "custom_strategies/convergencecriterias/trilinos_displacement_criteria.h" -// #include "custom_strategies/convergencecriterias/trilinos_up_criteria.h" -// #include "custom_strategies/builder_and_solvers/trilinos_builder_and_solver_ML.h" -// #include "custom_strategies/builder_and_solvers/trilinos_builder_and_solver_ML_vec.h" -// #include "custom_strategies/builder_and_solvers/trilinos_builder_and_solver_ML_mixed.h" //linear solvers #include "linear_solvers/linear_solver.h" @@ -73,8 +49,8 @@ #include "external_includes/amesos_solver.h" #include "external_includes/ml_solver.h" -#include "external_includes/amgcl_solver.h" -#include "external_includes/mpi_amgcl_ns_solver.h" +#include "external_includes/amgcl_mpi_solver.h" +#include "external_includes/amgcl_mpi_schur_complement_solver.h" namespace Kratos { @@ -136,9 +112,9 @@ void AddLinearSolvers() .def("SetIntParameter", &AmgclMPISolverType::SetIntParameter) ; - typedef AmgclMPI_NS_Solver AmgclMPI_NS_SolverType; - class_, boost::noncopyable > - ("AmgclMPI_NS_Solver",init()) + typedef AmgclMPISchurComplementSolver AmgclMPISchurComplementSolverType; + class_, boost::noncopyable > + ("AmgclMPISchurComplementSolver",init()) ; enum_("AztecScalingType") diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h new file mode 100644 index 000000000000..07965a245877 --- /dev/null +++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h @@ -0,0 +1,378 @@ +// | / | +// ' / __| _` | __| _ \ __| +// . \ | ( | | ( |\__ ` +// _|\_\_| \__,_|\__|\___/ ____/ +// Multi-Physics +// +// License: BSD License +// Kratos default license: kratos/license.txt +// +// Main authors: Denis Demidov +// Riccardo Rossi +// + + +#if !defined(KRATOS_AMGCL_MPI_SCHUR_COMPLEMENT_SOLVER_H_INCLUDED ) +#define KRATOS_AMGCL_MPI_SCHUR_COMPLEMENT_SOLVER_H_INCLUDED + + +#ifndef AMGCL_PARAM_UNKNOWN +# define AMGCL_PARAM_UNKNOWN(name) \ + std::cerr << "AMGCL WARNING: unknown parameter " << name << std::endl +#endif + + +// External includes +#include "boost/smart_ptr.hpp" + +// Project includes +#include "includes/define.h" +#include "includes/kratos_parameters.h" +#include "linear_solvers/linear_solver.h" +#include "external_includes/amgcl_mpi_solver.h" + +//aztec solver includes +#include "AztecOO.h" +#include "Epetra_LinearProblem.h" +//#include "Teuchos_ParameterList.hpp" + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include //needed to print AMGCL internal settings + + + +namespace Kratos +{ + + + +template< class TSparseSpaceType, class TDenseSpaceType, + class TReordererType = Reorderer > +class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, + TDenseSpaceType, TReordererType> +{ +public: + /** + * Counted pointer of AmgclMPISchurComplementSolver + */ + KRATOS_CLASS_POINTER_DEFINITION ( AmgclMPISchurComplementSolver ); + + typedef LinearSolver BaseType; + + typedef typename TSparseSpaceType::MatrixType SparseMatrixType; + + typedef typename TSparseSpaceType::VectorType VectorType; + + typedef typename TDenseSpaceType::MatrixType DenseMatrixType; + + + AmgclMPISchurComplementSolver ( Parameters rParameters ) + { + Parameters default_parameters( R"( + { + "solver_type" : "AmgclMPISchurComplementSolver", + "krylov_type" : "fgmres", + "velocity_block_preconditioner" : + { + "krylov_type" : "lgmres", + "tolerance" : 1e-3, + "preconditioner_type" : "ilu0", + "max_iteration": 5 + }, + "pressure_block_preconditioner" : + { + "krylov_type" : "lgmres", + "tolerance" : 1e-2, + "preconditioner_type" : "spai0", + "max_iteration": 20 + }, + "tolerance" : 1e-9, + "gmres_krylov_space_dimension": 50, + "coarsening_type": "aggregation", + "max_iteration": 50, + "verbosity" : 1, + "scaling": false, + "coarse_enough" : 5000 + } )" ); + + + //now validate agains defaults -- this also ensures no type mismatch + rParameters.ValidateAndAssignDefaults(default_parameters); + + std::stringstream msg; + + //validate if values are admissible + std::set available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"}; + + //check velocity block settings + if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end()) + { + msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl; + msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl; + KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str()); + } + + mCoarseEnough = rParameters["coarse_enough"].GetInt(); + + mTolerance = rParameters["tolerance"].GetDouble(); + mMaxIterations = rParameters["max_iteration"].GetInt(); + mVerbosity=rParameters["verbosity"].GetInt(); + mprm.put("solver.type", rParameters["krylov_type"].GetString()); + + if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres") + mprm.put("solver.M", rParameters["gmres_krylov_space_dimension"].GetInt()); + + //setting velocity solver options + mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString()); + mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble()); + mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt()); + mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); + + //setting pressure solver options + mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); + mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); + mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); + mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); + mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0); + mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1); + mprm.put("precond.psolver.precond.coarse_enough",mCoarseEnough); + + } + + + /** + * Destructor + */ + virtual ~AmgclMPISchurComplementSolver() {} + + /** + * Normal solve method. + * Solves the linear system Ax=b and puts the result on SystemVector& rX. + * rX is also th initial guess for iterative methods. + * @param rA. System matrix + * @param rX. Solution vector. + * @param rB. Right hand side vector. + */ + bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB ) + { + KRATOS_TRY + + using amgcl::prof; + prof.reset(); + + amgcl::mpi::communicator world ( MPI_COMM_WORLD ); + if ( mVerbosity >=0 && world.rank == 0 ) { + std::cout << "World size: " << world.size << std::endl; + } + + int chunk = rA.NumMyRows(); + boost::iterator_range xrange ( rX.Values(), rX.Values() + chunk ); + boost::iterator_range frange ( rB.Values(), rB.Values() + chunk ); + + if ( mVerbosity > 1 && world.rank == 0 ) { + write_json ( std::cout, mprm ); + } + + typedef amgcl::backend::builtin Backend; + + typedef amgcl::mpi::make_solver< + amgcl::mpi::schur_pressure_correction< + amgcl::mpi::make_solver< + amgcl::mpi::block_preconditioner< + amgcl::runtime::relaxation::as_preconditioner + >, + amgcl::runtime::iterative_solver + >, + amgcl::mpi::make_solver< + amgcl::mpi::subdomain_deflation< + amgcl::runtime::amg, + amgcl::runtime::iterative_solver, + amgcl::runtime::mpi::direct_solver + >, + amgcl::runtime::iterative_solver + > + >, + amgcl::runtime::iterative_solver + > SDD; + + boost::function dv = amgcl::mpi::constant_deflation(1); + mprm.put("precond.psolver.precond.num_def_vec", 1); + mprm.put("precond.psolver.precond.def_vec", &dv); + + mprm.put("precond.pmask", static_cast(&mPressureMask[0])); + mprm.put("precond.pmask_size", mPressureMask.size()); + + prof.tic ( "setup" ); + SDD solve ( world, amgcl::backend::map ( rA ), mprm ); + double tm_setup = prof.toc ( "setup" ); + + prof.tic ( "Solve" ); + size_t iters; + double resid; + boost::tie ( iters, resid ) = solve ( frange, xrange ); + double solve_tm = prof.toc ( "Solve" ); + + + + if ( rA.Comm().MyPID() == 0 ) { + if ( mVerbosity > 0 ) { + std::cout + << "------- AMGCL -------\n" << std::endl + << "Iterations : " << iters << std::endl + << "Error : " << resid << std::endl + << "amgcl setup time: " << tm_setup << std::endl + << "amgcl solve time: " << solve_tm << std::endl; + } + + if ( mVerbosity > 1 ) { + std::cout << prof << std::endl; + } + } + + + + return true; + + KRATOS_CATCH ( "" ); + } + + /** + * Assignment operator. + */ + AmgclMPISchurComplementSolver& operator= ( const AmgclMPISchurComplementSolver& Other ) = delete; + + /** + * Copy constructor. + */ + AmgclMPISchurComplementSolver ( const AmgclMPISchurComplementSolver& Other ) = delete; + + /** + * Multi solve method for solving a set of linear systems with same coefficient matrix. + * Solves the linear system Ax=b and puts the result on SystemVector& rX. + * rX is also th initial guess for iterative methods. + * @param rA. System matrix + * @param rX. Solution vector. + * @param rB. Right hand side vector. + */ + bool Solve ( SparseMatrixType& rA, DenseMatrixType& rX, DenseMatrixType& rB ) override + { + + return false; + } + + /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example + * when solving a mixed u-p problem, it is important to identify the row associated to v and p. + * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers + * which require knowledge on the spatial position of the nodes associated to a given dof. + * This function tells if the solver requires such data + */ + bool AdditionalPhysicalDataIsNeeded() override + { + return true; + } + + /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example + * when solving a mixed u-p problem, it is important to identify the row associated to v and p. + * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers + * which require knowledge on the spatial position of the nodes associated to a given dof. + * This function is the place to eventually provide such data + */ + void ProvideAdditionalData ( + SparseMatrixType& rA, + VectorType& rX, + VectorType& rB, + typename ModelPart::DofsArrayType& rdof_set, + ModelPart& r_model_part + ) override + { + + int my_pid = rA.Comm().MyPID(); + + //filling the pressure mask + if(mPressureMask.size() != static_cast(rA.NumMyRows())) + mPressureMask.resize( rA.NumMyRows(), false ); + + int counter = 0; + int npressures = 0; + for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++) + { + if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid ) + { + mPressureMask[counter] = (it->GetVariable().Key() == PRESSURE); + counter++; + +#ifdef KRATOS_DEBUG + if(it->GetVariable().Key() == PRESSURE) + npressures++; +#endif + } + } +#ifdef KRATOS_DEBUG + std::cout << "MPI proc :" << my_pid << " npressures = " << npressures << " local rows = " << rA.NumMyRows(); +#endif + if(counter != rA.NumMyRows()) + KRATOS_ERROR << "pressure mask as a size " << mPressureMask.size() << " which does not correspond with the number of local rows:" << rA.NumMyRows() << std::endl; + + } + + /** + * Print information about this object. + */ + void PrintInfo ( std::ostream& rOStream ) const + { + rOStream << "AMGCL_MPI solver finished."; + } + + /** + * Print object's data. + */ + void PrintData ( std::ostream& rOStream ) const + { + } + +private: + + double mTolerance; + int mMaxIterations; + int mVerbosity = 0; + unsigned int mCoarseEnough = 5000; + + std::vector< char > mPressureMask; //pressure mask + boost::property_tree::ptree mprm; + + + + +}; + + +/** + * output stream function + */ +template +inline std::ostream& operator << ( std::ostream& rOStream, + const AmgclMPISchurComplementSolver& rThis ) +{ + rThis.PrintInfo ( rOStream ); + rOStream << std::endl; + rThis.PrintData ( rOStream ); + + return rOStream; +} + + +} // namespace Kratos. + +#endif // KRATOS_AMGCL_MPI_SCHUR_COMPLEMENT_SOLVER_H_INCLUDED defined + + diff --git a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py index 853140f1616e..f948a881e7a1 100644 --- a/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py +++ b/applications/trilinos_application/python_scripts/trilinos_linear_solver_factory.py @@ -21,8 +21,8 @@ def ConstructSolver(configuration): linear_solver = MultiLevelSolver(configuration) elif(solver_type == "AmgclMPISolver"): linear_solver = AmgclMPISolver(configuration); - elif(solver_type == "AmgclMPI_NS_Solver"): - linear_solver = AmgclMPI_NS_Solver(configuration); + elif(solver_type == "AmgclMPISchurComplementSolver"): + linear_solver = AmgclMPISchurComplementSolver(configuration); # elif(solver_type == "Deflated Conjugate gradient"): raise Exception("not implemented within trilinos") From f757422ac59a4b5e2cbb658a97443f15fdc09027 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 8 Aug 2017 11:03:55 +0200 Subject: [PATCH 06/12] styling change --- .../external_includes/amgcl_mpi_schur_complement_solver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h index 07965a245877..7c45cc4c935c 100644 --- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h +++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h @@ -290,8 +290,8 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, SparseMatrixType& rA, VectorType& rX, VectorType& rB, - typename ModelPart::DofsArrayType& rdof_set, - ModelPart& r_model_part + typename ModelPart::DofsArrayType& rDofSet, + ModelPart& rModelPart ) override { @@ -303,7 +303,7 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, int counter = 0; int npressures = 0; - for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++) + for (ModelPart::DofsArrayType::iterator it = rDofSet.begin(); it!=rDofSet.end(); it++) { if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid ) { From de8b53db14ecf34e48f0ca9f7af8557ee45106e9 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Thu, 10 Aug 2017 14:39:38 +0200 Subject: [PATCH 07/12] bumping up amgcl library version --- .../amgcl/adapter/crs_tuple.hpp | 1 + external_libraries/amgcl/amg.hpp | 83 +++++---- external_libraries/amgcl/backend/builtin.hpp | 7 +- external_libraries/amgcl/backend/cuda.hpp | 43 +++++ external_libraries/amgcl/backend/hpx.hpp | 1 - .../amgcl/backend/interface.hpp | 27 ++- external_libraries/amgcl/backend/vexcl.hpp | 2 +- .../amgcl/coarsening/aggregation.hpp | 8 +- .../amgcl/coarsening/ruge_stuben.hpp | 14 +- .../amgcl/coarsening/smoothed_aggr_emin.hpp | 8 +- .../amgcl/coarsening/smoothed_aggregation.hpp | 158 ++++++++++++++++-- .../coarsening/tentative_prolongation.hpp | 4 +- .../amgcl/make_block_solver.hpp | 4 + .../amgcl/mpi/inner_product.hpp | 4 +- .../amgcl/mpi/schur_pressure_correction.hpp | 51 ++++-- .../amgcl/mpi/subdomain_deflation.hpp | 76 ++++----- .../amgcl/preconditioner/dummy.hpp | 2 +- .../schur_pressure_correction.hpp | 27 ++- external_libraries/amgcl/profiler.hpp | 17 +- .../amgcl/relaxation/chebyshev.hpp | 6 +- .../amgcl/relaxation/detail/ilu_solve.hpp | 123 +++++++++++--- external_libraries/amgcl/relaxation/ilu0.hpp | 22 +-- external_libraries/amgcl/relaxation/iluk.hpp | 23 +-- external_libraries/amgcl/relaxation/ilut.hpp | 23 +-- external_libraries/amgcl/runtime.hpp | 14 +- .../amgcl/solver/skyline_lu.hpp | 127 +------------- external_libraries/amgcl/util.hpp | 18 +- 27 files changed, 557 insertions(+), 336 deletions(-) diff --git a/external_libraries/amgcl/adapter/crs_tuple.hpp b/external_libraries/amgcl/adapter/crs_tuple.hpp index 41e5e19ae70e..65c6ec76c4ce 100644 --- a/external_libraries/amgcl/adapter/crs_tuple.hpp +++ b/external_libraries/amgcl/adapter/crs_tuple.hpp @@ -64,6 +64,7 @@ AMG amg(boost::make_tuple(n, #include #include #include +#include #include #include diff --git a/external_libraries/amgcl/amg.hpp b/external_libraries/amgcl/amg.hpp index 86ed0099ac92..fd919f50508a 100644 --- a/external_libraries/amgcl/amg.hpp +++ b/external_libraries/amgcl/amg.hpp @@ -325,39 +325,48 @@ class amg { level() {} level(boost::shared_ptr A, - params &prm, const backend_params &bprm) : - m_rows(backend::rows(*A)), - m_nonzeros(backend::nonzeros(*A)), - f(Backend::create_vector(m_rows, bprm)), - u(Backend::create_vector(m_rows, bprm)), - t(Backend::create_vector(m_rows, bprm)), - A(Backend::copy_matrix(A, bprm)), - relax(boost::make_shared(*A, prm.relax, bprm)) - {} + params &prm, const backend_params &bprm) + : m_rows(backend::rows(*A)), m_nonzeros(backend::nonzeros(*A)) + { + AMGCL_TIC("move to backend"); + f = Backend::create_vector(m_rows, bprm); + u = Backend::create_vector(m_rows, bprm); + t = Backend::create_vector(m_rows, bprm); + this->A = Backend::copy_matrix(A, bprm); + AMGCL_TOC("move to backend"); + + AMGCL_TIC("relaxation"); + relax = boost::make_shared(*A, prm.relax, bprm); + AMGCL_TOC("relaxation"); + } boost::shared_ptr step_down( boost::shared_ptr A, params &prm, const backend_params &bprm) { - TIC("transfer operators"); + AMGCL_TIC("transfer operators"); boost::shared_ptr P, R; boost::tie(P, R) = Coarsening::transfer_operators( *A, prm.coarsening); - precondition(backend::cols(*P) > 0, - "Zero-sized coarse level in amgcl (diagonal matrix?)"); + if(backend::cols(*P) == 0) { + // Zero-sized coarse level in amgcl (diagonal matrix?) + return boost::shared_ptr(); + } sort_rows(*P); sort_rows(*R); - TOC("transfer operators"); + AMGCL_TOC("transfer operators"); + AMGCL_TIC("move to backend"); this->P = Backend::copy_matrix(P, bprm); this->R = Backend::copy_matrix(R, bprm); + AMGCL_TOC("move to backend"); - TIC("coarse operator"); + AMGCL_TIC("coarse operator"); A = Coarsening::coarse_operator(*A, *P, *R, prm.coarsening); sort_rows(*A); - TOC("coarse operator"); + AMGCL_TOC("coarse operator"); return A; } @@ -405,6 +414,8 @@ class amg { "Matrix should be square!" ); + bool direct_coarse_solve = true; + while( backend::rows(*A) > prm.coarse_enough && levels.size() < prm.max_levels) { { #ifdef AMGCL_ASYNC_SETUP @@ -416,10 +427,22 @@ class amg { ready_to_cycle.notify_all(); #endif A = levels.back().step_down(A, prm, bprm); + if (!A) { + // Zero-sized coarse level. Probably the system matrix on + // this level is diagonal, should be easily solvable with a + // couple of smoother iterations. + direct_coarse_solve = false; + break; + } } - if (levels.size() < prm.max_levels) { - TIC("coarsest level"); + if (!A || backend::rows(*A) > prm.coarse_enough) { + // The coarse matrix is still too big to be solved directly. + direct_coarse_solve = false; + } + + if (direct_coarse_solve) { + AMGCL_TIC("coarsest level"); if (prm.direct_coarse) { level l; l.create_coarse(A, bprm, levels.empty()); @@ -441,7 +464,7 @@ class amg { #ifdef AMGCL_ASYNC_SETUP ready_to_cycle.notify_all(); #endif - TOC("coarsest level"); + AMGCL_TOC("coarsest level"); } } @@ -478,41 +501,35 @@ class amg { if (nxt == end) { if (lvl->solve) { - TIC("coarse"); + AMGCL_TIC("coarse"); (*lvl->solve)(rhs, x); - TOC("coarse"); + AMGCL_TOC("coarse"); } else { - TIC("relax"); + AMGCL_TIC("relax"); lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t, prm.relax); lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t, prm.relax); - TOC("relax"); + AMGCL_TOC("relax"); } } else { for (size_t j = 0; j < prm.ncycle; ++j) { - TIC("relax"); + AMGCL_TIC("relax"); for(size_t i = 0; i < prm.npre; ++i) lvl->relax->apply_pre(*lvl->A, rhs, x, *lvl->t, prm.relax); - TOC("relax"); + AMGCL_TOC("relax"); - TIC("residual"); backend::residual(rhs, *lvl->A, x, *lvl->t); - TOC("residual"); - TIC("restrict"); backend::spmv(math::identity(), *lvl->R, *lvl->t, math::zero(), *nxt->f); - TOC("restrict"); backend::clear(*nxt->u); cycle(nxt, *nxt->f, *nxt->u); - TIC("prolongate"); backend::spmv(math::identity(), *lvl->P, *nxt->u, math::identity(), x); - TOC("prolongate"); - TIC("relax"); - for(size_t i = 0; i < prm.npre; ++i) + AMGCL_TIC("relax"); + for(size_t i = 0; i < prm.npost; ++i) lvl->relax->apply_post(*lvl->A, rhs, x, *lvl->t, prm.relax); - TOC("relax"); + AMGCL_TOC("relax"); } } } diff --git a/external_libraries/amgcl/backend/builtin.hpp b/external_libraries/amgcl/backend/builtin.hpp index c9e78881aea6..1e244b13a176 100644 --- a/external_libraries/amgcl/backend/builtin.hpp +++ b/external_libraries/amgcl/backend/builtin.hpp @@ -368,7 +368,7 @@ product(const MatrixA &A, const MatrixB &B, bool sort = false) { int nt = 1; #endif - if (nt > 4) { + if (nt > 16) { spgemm_rmerge(A, B, *C); } else { spgemm_saad(A, B, *C, sort); @@ -428,7 +428,10 @@ class numa_vector { } template - numa_vector(const Vector &other) : n(other.size()), p(new T[n]) { + numa_vector(const Vector &other, + typename boost::disable_if, int>::type = 0 + ) : n(other.size()), p(new T[n]) + { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) p[i] = other[i]; diff --git a/external_libraries/amgcl/backend/cuda.hpp b/external_libraries/amgcl/backend/cuda.hpp index 4f474a6bb623..21d22ecefa6a 100644 --- a/external_libraries/amgcl/backend/cuda.hpp +++ b/external_libraries/amgcl/backend/cuda.hpp @@ -86,6 +86,14 @@ inline void cuda_check(cusparseStatus_t rc, const char *file, int line) { } } +inline void cuda_check(cudaError_t rc, const char *file, int line) { + if (rc != cudaSuccess) { + std::ostringstream msg; + msg << "CUDA error " << rc << " at \"" << file << ":" << line; + precondition(false, msg.str()); + } +} + #define AMGCL_CALL_CUDA(rc) \ amgcl::backend::detail::cuda_check(rc, __FILE__, __LINE__) @@ -105,6 +113,10 @@ struct cuda_deleter { void operator()(csrsv2Info_t handle) { AMGCL_CALL_CUDA( cusparseDestroyCsrsv2Info(handle) ); } + + void operator()(cudaEvent_t handle) { + AMGCL_CALL_CUDA( cudaEventDestroy(handle) ); + } }; @@ -536,6 +548,37 @@ struct vmul_impl< } }; +class cuda_event { + public: + cuda_event() : e(create_event(), backend::detail::cuda_deleter()) { } + + float operator-(cuda_event tic) const { + float delta; + cudaEventSynchronize(e.get()); + cudaEventElapsedTime(&delta, tic.e.get(), e.get()); + return delta / 1000.0f; + } + private: + boost::shared_ptr::type> e; + + static cudaEvent_t create_event() { + cudaEvent_t e; + cudaEventCreate(&e); + cudaEventRecord(e, 0); + return e; + } +}; + +struct cuda_clock { + typedef cuda_event value_type; + + static const char* units() { return "s"; } + + cuda_event current() const { + return cuda_event(); + } +}; + } // namespace backend } // namespace amgcl diff --git a/external_libraries/amgcl/backend/hpx.hpp b/external_libraries/amgcl/backend/hpx.hpp index 811d8e8c402e..e6e63121f202 100644 --- a/external_libraries/amgcl/backend/hpx.hpp +++ b/external_libraries/amgcl/backend/hpx.hpp @@ -38,7 +38,6 @@ THE SOFTWARE. #include #include -#include #include #include #include diff --git a/external_libraries/amgcl/backend/interface.hpp b/external_libraries/amgcl/backend/interface.hpp index 9bab59964647..417823e2a147 100644 --- a/external_libraries/amgcl/backend/interface.hpp +++ b/external_libraries/amgcl/backend/interface.hpp @@ -270,7 +270,9 @@ void spmv( Beta beta, Vector2 &y) { + AMGCL_TIC("spmv"); spmv_impl::apply(alpha, A, x, beta, y); + AMGCL_TOC("spmv"); } /// Computes residual error. @@ -280,28 +282,36 @@ void spmv( template void residual(const Vector1 &rhs, const Matrix &A, const Vector2 &x, Vector3 &r) { + AMGCL_TIC("residual"); residual_impl::apply(rhs, A, x, r); + AMGCL_TOC("residual"); } /// Zeros out a vector. template void clear(Vector &x) { + AMGCL_TIC("clear"); clear_impl::apply(x); + AMGCL_TOC("clear"); } /// Vector copy. template void copy(const Vector1 &x, Vector2 &y) { + AMGCL_TIC("copy"); copy_impl::apply(x, y); + AMGCL_TOC("copy"); } /// Copy data to backend. template void copy_to_backend(const std::vector::type> &data, Vector &x) { + AMGCL_TIC("copy_to_backend"); copy_to_backend_impl::apply(data, x); + AMGCL_TOC("copy_to_backend"); } /// Computes inner product of two vectors. @@ -311,7 +321,15 @@ typename math::inner_product_impl< >::return_type inner_product(const Vector1 &x, const Vector2 &y) { - return inner_product_impl::get(x, y); + typedef typename math::inner_product_impl< + typename value_type::type + >::return_type result_type; + + AMGCL_TIC("inner_product"); + result_type p = inner_product_impl::get(x, y); + AMGCL_TOC("inner_product"); + + return p; } /// Computes linear combination of two vectors. @@ -320,7 +338,9 @@ inner_product(const Vector1 &x, const Vector2 &y) */ template void axpby(A a, Vector1 const &x, B b, Vector2 &y) { + AMGCL_TIC("axpby"); axpby_impl::apply(a, x, b, y); + AMGCL_TOC("axpby"); } /// Computes linear combination of three vectors. @@ -329,7 +349,9 @@ void axpby(A a, Vector1 const &x, B b, Vector2 &y) { */ template void axpbypcz(A a, Vector1 const &x, B b, Vector2 const &y, C c, Vector3 &z) { + AMGCL_TIC("axpbypcz"); axpbypcz_impl::apply(a, x, b, y, c, z); + AMGCL_TOC("axpbypcz"); } /// Computes element-wize vector product. @@ -339,7 +361,9 @@ void axpbypcz(A a, Vector1 const &x, B b, Vector2 const &y, C c, Vector3 &z) { template void vmul(Alpha alpha, const Vector1 &x, const Vector2 &y, Beta beta, Vector3 &z) { + AMGCL_TIC("vmul"); vmul_impl::apply(alpha, x, y, beta, z); + AMGCL_TOC("vmul"); } /// Is the relaxation supported by the backend? @@ -357,7 +381,6 @@ struct coarsening_is_supported : boost::true_type {}; template void lin_comb(size_t n, const Coefs &c, const Vecs &v, const Coef &alpha, Vec &y) { axpby(c[0], *v[0], alpha, y); - size_t i = 1; for(; i + 1 < n; i += 2) axpbypcz(c[i], *v[i], c[i+1], *v[i+1], math::identity(), y); diff --git a/external_libraries/amgcl/backend/vexcl.hpp b/external_libraries/amgcl/backend/vexcl.hpp index 33123c29a96a..16fc5211893c 100644 --- a/external_libraries/amgcl/backend/vexcl.hpp +++ b/external_libraries/amgcl/backend/vexcl.hpp @@ -111,7 +111,7 @@ struct vexcl { params() : fast_matrix_setup(true) {} params(const boost::property_tree::ptree &p) - : AMGCL_PARAMS_IMPORT_CHILD(p, fast_matrix_setup) + : AMGCL_PARAMS_IMPORT_VALUE(p, fast_matrix_setup) { std::vector *ptr = 0; ptr = p.get("q", ptr); diff --git a/external_libraries/amgcl/coarsening/aggregation.hpp b/external_libraries/amgcl/coarsening/aggregation.hpp index 33f4a12dffce..b48c14d8144d 100644 --- a/external_libraries/amgcl/coarsening/aggregation.hpp +++ b/external_libraries/amgcl/coarsening/aggregation.hpp @@ -126,15 +126,15 @@ struct aggregation { { const size_t n = rows(A); - TIC("aggregates"); + AMGCL_TIC("aggregates"); Aggregates aggr(A, prm.aggr, prm.nullspace.cols); - TOC("aggregates"); + AMGCL_TOC("aggregates"); - TIC("interpolation"); + AMGCL_TIC("interpolation"); boost::shared_ptr P = tentative_prolongation( n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size ); - TOC("interpolation"); + AMGCL_TOC("interpolation"); if (prm.nullspace.cols > 0) prm.aggr.block_size = prm.nullspace.cols; diff --git a/external_libraries/amgcl/coarsening/ruge_stuben.hpp b/external_libraries/amgcl/coarsening/ruge_stuben.hpp index 3aa6c6683623..c5df4785bb25 100644 --- a/external_libraries/amgcl/coarsening/ruge_stuben.hpp +++ b/external_libraries/amgcl/coarsening/ruge_stuben.hpp @@ -113,12 +113,12 @@ struct ruge_stuben { std::vector cf(n, 'U'); backend::crs S; - TIC("C/F split"); + AMGCL_TIC("C/F split"); connect(A, prm.eps_strong, S, cf); cfsplit(A, S, cf); - TOC("C/F split"); + AMGCL_TOC("C/F split"); - TIC("interpolation"); + AMGCL_TIC("interpolation"); size_t nc = 0; std::vector cidx(n); for(size_t i = 0; i < n; ++i) @@ -230,14 +230,14 @@ struct ruge_stuben { Val v = A.val[j]; if (!S.val[j] || cf[c] != 'C') continue; - if (prm.do_trunc && Amin[i] < v && v < Amax[i]) continue; + if (prm.do_trunc && Amin[i] <= v && v <= Amax[i]) continue; P->col[row_head] = cidx[c]; P->val[row_head] = (v < zero ? alpha : beta) * v; ++row_head; } } - TOC("interpolation"); + AMGCL_TOC("interpolation"); return boost::make_tuple(P, transpose(*P)); } @@ -286,6 +286,8 @@ struct ruge_stuben { #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast(n); ++i) { + S.ptr[i+1] = 0; + Val a_min = math::zero(); for(row_iterator a = row_begin(A, i); a; ++a) @@ -300,8 +302,6 @@ struct ruge_stuben { for(Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) S.val[j] = (A.col[j] != i && A.val[j] < a_min); - - S.ptr[i+1] = 0; } // Transposition of S: diff --git a/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp b/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp index 092e6b0028de..e148644694f8 100644 --- a/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp +++ b/external_libraries/amgcl/coarsening/smoothed_aggr_emin.hpp @@ -89,12 +89,12 @@ struct smoothed_aggr_emin { typedef typename backend::value_type::type Val; typedef ptrdiff_t Idx; - TIC("aggregates"); + AMGCL_TIC("aggregates"); Aggregates aggr(A, prm.aggr, prm.nullspace.cols); prm.aggr.eps_strong *= 0.5; - TOC("aggregates"); + AMGCL_TOC("aggregates"); - TIC("interpolation"); + AMGCL_TIC("interpolation"); boost::shared_ptr P_tent = tentative_prolongation( rows(A), aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size ); @@ -157,7 +157,7 @@ struct smoothed_aggr_emin { boost::shared_ptr P = interpolation(Af, dia, *P_tent, omega); boost::shared_ptr R = restriction (Af, dia, *P_tent, omega); - TOC("interpolation"); + AMGCL_TOC("interpolation"); if (prm.nullspace.cols > 0) prm.aggr.block_size = prm.nullspace.cols; diff --git a/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp b/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp index 9e04b2d112f1..b46b53ac53da 100644 --- a/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp +++ b/external_libraries/amgcl/coarsening/smoothed_aggregation.hpp @@ -31,9 +31,15 @@ THE SOFTWARE. * \brief Smoothed aggregation coarsening scheme. */ +#ifdef _OPENMP +# include +#endif + #include #include #include +#include +#include #include #include @@ -60,8 +66,14 @@ struct smoothed_aggregation { /// Near nullspace parameters. nullspace_params nullspace; - /// Relaxation factor \f$\omega\f$. + /// Relaxation factor. /** + * Used as a scaling for the damping factor omega. + * When estimate_spectral_radius is set, then + * omega = relax * (4/3) / rho. + * Otherwise + * omega = relax * (2/3). + * * Piecewise constant prolongation \f$\tilde P\f$ from non-smoothed * aggregation is improved by a smoothing to get the final prolongation * matrix \f$P\f$. Simple Jacobi smoother is used here, giving the @@ -82,20 +94,33 @@ struct smoothed_aggregation { */ float relax; - params() : relax(0.666f) { } + // Use power iterations to estimate the matrix spectral radius. + // This usually improves convergence rate and results in faster solves, + // but costs some time during setup. + bool estimate_spectral_radius; + + // Number of power iterations to apply for the spectral radius + // estimation. + int power_iters; + + params() : relax(1.0f), estimate_spectral_radius(true), power_iters(5) { } params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_CHILD(p, aggr), AMGCL_PARAMS_IMPORT_CHILD(p, nullspace), - AMGCL_PARAMS_IMPORT_VALUE(p, relax) + AMGCL_PARAMS_IMPORT_VALUE(p, relax), + AMGCL_PARAMS_IMPORT_VALUE(p, estimate_spectral_radius), + AMGCL_PARAMS_IMPORT_VALUE(p, power_iters) { - AMGCL_PARAMS_CHECK(p, (aggr)(nullspace)(relax)); + AMGCL_PARAMS_CHECK(p, (aggr)(nullspace)(relax)(estimate_spectral_radius)(power_iters)); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_CHILD(p, path, aggr); AMGCL_PARAMS_EXPORT_CHILD(p, path, nullspace); AMGCL_PARAMS_EXPORT_VALUE(p, path, relax); + AMGCL_PARAMS_EXPORT_VALUE(p, path, estimate_spectral_radius); + AMGCL_PARAMS_EXPORT_VALUE(p, path, power_iters); } }; @@ -109,12 +134,12 @@ struct smoothed_aggregation { const size_t n = rows(A); - TIC("aggregates"); + AMGCL_TIC("aggregates"); Aggregates aggr(A, prm.aggr, prm.nullspace.cols); prm.aggr.eps_strong *= 0.5; - TOC("aggregates"); + AMGCL_TOC("aggregates"); - TIC("interpolation"); + AMGCL_TIC("interpolation"); boost::shared_ptr P_tent = tentative_prolongation( n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size ); @@ -122,6 +147,13 @@ struct smoothed_aggregation { boost::shared_ptr P = boost::make_shared(); P->set_size(rows(*P_tent), cols(*P_tent), true); + scalar_type omega = prm.relax; + if (prm.estimate_spectral_radius) { + omega *= static_cast(4.0/3) / spectral_radius(A, prm.power_iters); + } else { + omega *= static_cast(2.0/3); + } + #pragma omp parallel { std::vector marker(P->ncols, -1); @@ -179,8 +211,8 @@ struct smoothed_aggregation { if (ca != i && !aggr.strong_connection[ja]) continue; value_type va = (ca == i) - ? static_cast(static_cast(1 - prm.relax) * math::identity()) - : static_cast(static_cast(-prm.relax) * dia * A.val[ja]); + ? static_cast(static_cast(1 - omega) * math::identity()) + : static_cast(static_cast(-omega) * dia * A.val[ja]); for(ptrdiff_t jp = P_tent->ptr[ca], ep = P_tent->ptr[ca+1]; jp < ep; ++jp) { ptrdiff_t cp = P_tent->col[jp]; @@ -198,7 +230,7 @@ struct smoothed_aggregation { } } } - TOC("interpolation"); + AMGCL_TOC("interpolation"); if (prm.nullspace.cols > 0) prm.aggr.block_size = prm.nullspace.cols; @@ -218,6 +250,112 @@ struct smoothed_aggregation { { return detail::galerkin(A, P, R); } + + // Uses power iteration to estimate spectral readius of the matrix, + // scaled by its inverse diagonal. + template + static + typename math::scalar_of::type>::type + spectral_radius(const Matrix &A, int power_iters) + { + typedef typename backend::value_type::type value_type; + typedef typename math::rhs_of::type rhs_type; + typedef typename math::scalar_of::type scalar_type; + + const ptrdiff_t n = backend::rows(A); + + backend::numa_vector D(n, false); + backend::numa_vector b0(n, false), b1(n, false); + + // Fill the initial vector with random values. + // Also extract the inverted matrix diagonal values. + scalar_type b0_norm = 0; +#pragma omp parallel + { +#ifdef _OPENMP + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + boost::random::mt11213b rng(tid); + boost::random::uniform_real_distribution rnd(-1, 1); + + scalar_type loc_norm = 0; + +#pragma omp for nowait + for(ptrdiff_t i = 0; i < n; ++i) { + rhs_type v = math::constant(rnd(rng)); + + b0[i] = v; + loc_norm += math::norm(math::inner_product(v,v)); + + for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { + if (A.col[j] == i) { + D[i] = math::inverse(A.val[j]); + break; + } + } + } + +#pragma omp critical + b0_norm += loc_norm; + } + + // Normalize b0 + b0_norm = 1 / sqrt(b0_norm); +#pragma omp parallel for + for(ptrdiff_t i = 0; i < n; ++i) { + b0[i] = b0_norm * b0[i]; + } + + scalar_type radius = 1; + + for(int iter = 0; iter < power_iters;) { + // b1 = (D * A) * b0 + // b1_norm = ||b1|| + // radius = + scalar_type b1_norm = 0; + radius = 0; +#pragma omp parallel + { + scalar_type loc_norm = 0; + scalar_type loc_radi = 0; + +#pragma omp for nowait + for(ptrdiff_t i = 0; i < n; ++i) { + rhs_type s = math::zero(); + + for(ptrdiff_t j = A.ptr[i], e = A.ptr[i+1]; j < e; ++j) { + s += A.val[j] * b0[A.col[j]]; + } + + s = D[i] * s; + + loc_norm += math::norm(math::inner_product(s, s)); + loc_radi += math::norm(math::inner_product(s, b0[i])); + + b1[i] = s; + } + +#pragma omp critical + { + b1_norm += loc_norm; + radius += loc_radi; + } + } + + if (++iter < power_iters) { + // b0 = b1 / b1_norm + b1_norm = 1 / sqrt(b1_norm); +#pragma omp parallel for + for(ptrdiff_t i = 0; i < n; ++i) { + b0[i] = b1_norm * b1[i]; + } + } + } + + return radius < 0 ? static_cast(2) : radius; + } }; } // namespace coarsening diff --git a/external_libraries/amgcl/coarsening/tentative_prolongation.hpp b/external_libraries/amgcl/coarsening/tentative_prolongation.hpp index 497e075a871d..58e8f5e7784a 100644 --- a/external_libraries/amgcl/coarsening/tentative_prolongation.hpp +++ b/external_libraries/amgcl/coarsening/tentative_prolongation.hpp @@ -129,7 +129,7 @@ boost::shared_ptr tentative_prolongation( boost::shared_ptr P = boost::make_shared(); - TIC("tentative"); + AMGCL_TIC("tentative"); if (nullspace.cols > 0) { // Sort fine points by aggregate number. // Put points not belonging to any aggregate to the end of the list. @@ -210,7 +210,7 @@ boost::shared_ptr tentative_prolongation( } } } - TOC("tentative"); + AMGCL_TOC("tentative"); return P; } diff --git a/external_libraries/amgcl/make_block_solver.hpp b/external_libraries/amgcl/make_block_solver.hpp index c65f58493187..37167245e2b6 100644 --- a/external_libraries/amgcl/make_block_solver.hpp +++ b/external_libraries/amgcl/make_block_solver.hpp @@ -117,6 +117,10 @@ class make_block_solver { typename Precond::matrix const& system_matrix() const { return S->system_matrix(); } + + friend std::ostream& operator<<(std::ostream &os, const make_block_solver &p) { + return os << *p.S << std::endl; + } private: typedef make_solver Solver; boost::shared_ptr S; diff --git a/external_libraries/amgcl/mpi/inner_product.hpp b/external_libraries/amgcl/mpi/inner_product.hpp index d484024b86c8..2b4311504907 100644 --- a/external_libraries/amgcl/mpi/inner_product.hpp +++ b/external_libraries/amgcl/mpi/inner_product.hpp @@ -48,7 +48,7 @@ struct inner_product { template typename backend::value_type::type operator()(const Vec1 &x, const Vec2 &y) const { - TIC("inner product"); + AMGCL_TIC("inner product"); typedef typename backend::value_type::type value_type; value_type lsum = backend::inner_product(x, y); @@ -56,7 +56,7 @@ struct inner_product { MPI_Allreduce(&lsum, &gsum, 1, datatype(), MPI_SUM, comm); - TOC("inner product"); + AMGCL_TOC("inner product"); return gsum; } }; diff --git a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp index fd3f9706ceaa..afab65dbab86 100644 --- a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp +++ b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp @@ -77,11 +77,17 @@ class schur_pressure_correction { std::vector pmask; - params() {} + // Approximate Kuu^-1 with inverted diagonal of Kuu during + // construction of matrix-less Schur complement. + // When false, USolver is used instead. + bool approx_schur; + + params() : approx_schur(true) {} params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_CHILD(p, usolver), - AMGCL_PARAMS_IMPORT_CHILD(p, psolver) + AMGCL_PARAMS_IMPORT_CHILD(p, psolver), + AMGCL_PARAMS_IMPORT_VALUE(p, approx_schur) { void *pm = 0; size_t n = 0; @@ -100,13 +106,14 @@ class schur_pressure_correction { pmask.assign(static_cast(pm), static_cast(pm) + n); - AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size)); + AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size)(approx_schur)); } void get(boost::property_tree::ptree &p, const std::string &path = "") const { AMGCL_PARAMS_EXPORT_CHILD(p, path, usolver); AMGCL_PARAMS_EXPORT_CHILD(p, path, psolver); + AMGCL_PARAMS_EXPORT_VALUE(p, path, approx_schur); } } prm; @@ -233,7 +240,7 @@ class schur_pressure_correction { Kup_loc->set_size(nu, np, true); Kpu_rem->set_size(np, 0, true); - Kup_rem->set_size(np, 0, true); + Kup_rem->set_size(nu, 0, true); #pragma omp parallel for for(ptrdiff_t i = 0; i < n; ++i) { @@ -411,6 +418,9 @@ class schur_pressure_correction { tmp = backend_type::create_vector(nu, bprm); + if (prm.approx_schur) + M = backend_type::copy_vector(diagonal(*Kuu, /*invert = */true), bprm); + // Scatter/Gather matrices boost::shared_ptr x2u = boost::make_shared(); boost::shared_ptr x2p = boost::make_shared(); @@ -495,53 +505,57 @@ class schur_pressure_correction { #endif ) const { - TIC("split variables"); + AMGCL_TIC("split variables"); backend::spmv(1, *x2u, rhs, 0, *rhs_u); backend::spmv(1, *x2p, rhs, 0, *rhs_p); - TOC("split variables"); + AMGCL_TOC("split variables"); // Ai u = rhs_u - TIC("solve U"); + AMGCL_TIC("solve U"); backend::clear(*u); report("U1", (*U)(*rhs_u, *u)); - TOC("solve U"); + AMGCL_TOC("solve U"); // rhs_p -= Kpu u - TIC("solve P"); + AMGCL_TIC("solve P"); backend::spmv(-1, *Kpu, *u, 1, *rhs_p); // S p = rhs_p backend::clear(*p); report("P", (*P)(*this, *rhs_p, *p)); - TOC("solve P"); + AMGCL_TOC("solve P"); // rhs_u -= Kup p - TIC("Update U"); + AMGCL_TIC("Update U"); backend::spmv(-1, *Kup, *p, 1, *rhs_u); // Ai u = rhs_u backend::clear(*u); report("U2", (*U)(*rhs_u, *u)); - TOC("Update U"); + AMGCL_TOC("Update U"); - TIC("merge variables"); + AMGCL_TIC("merge variables"); backend::clear(x); backend::spmv(1, *u2x, *u, 1, x); backend::spmv(1, *p2x, *p, 1, x); - TOC("merge variables"); + AMGCL_TOC("merge variables"); } template void spmv(Alpha alpha, const Vec1 &x, Beta beta, Vec2 &y) const { // y = beta y + alpha S x, where S = Kpp - Kpu Kuu^-1 Kup - TIC("matrix-free spmv"); + AMGCL_TIC("matrix-free spmv"); backend::spmv(alpha, P->system_matrix(), x, beta, y); backend::spmv(1, *Kup, x, 0, *tmp); - backend::clear(*u); - (*U)(*tmp, *u); + if (prm.approx_schur) { + backend::vmul(1, *M, *tmp, 0, *u); + } else { + backend::clear(*u); + (*U)(*tmp, *u); + } backend::spmv(-alpha, *Kpu, *u, 1, y); - TOC("matrix-free spmv"); + AMGCL_TOC("matrix-free spmv"); } private: typedef comm_pattern CommPattern; @@ -553,6 +567,7 @@ class schur_pressure_correction { boost::shared_ptr x2p, x2u, p2x, u2x; boost::shared_ptr K, Kpu, Kup; boost::shared_ptr rhs_u, rhs_p, u, p, tmp; + boost::shared_ptr M; boost::shared_ptr U; boost::shared_ptr P; diff --git a/external_libraries/amgcl/mpi/subdomain_deflation.hpp b/external_libraries/amgcl/mpi/subdomain_deflation.hpp index 97aae2c3e1d1..f9c74f3cc2d8 100644 --- a/external_libraries/amgcl/mpi/subdomain_deflation.hpp +++ b/external_libraries/amgcl/mpi/subdomain_deflation.hpp @@ -145,7 +145,7 @@ class subdomain_deflation { q( backend_type::create_vector(nrows, bprm) ), S(nrows, prm.isolver, bprm, mpi::inner_product(mpi_comm)) { - TIC("setup deflation"); + AMGCL_TIC("setup deflation"); typedef backend::crs build_matrix; typedef typename backend::row_iterator::type row_iterator1; typedef typename backend::row_iterator::type row_iterator2; @@ -170,7 +170,7 @@ class subdomain_deflation { ptrdiff_t loc_end = domain[comm.rank + 1]; // Fill deflation vectors. - TIC("copy deflation vectors"); + AMGCL_TIC("copy deflation vectors"); { std::vector z(nrows); for(int j = 0; j < ndv; ++j) { @@ -179,12 +179,12 @@ class subdomain_deflation { Z[j] = backend_type::copy_vector(z, bprm); } } - TOC("copy deflation vectors"); + AMGCL_TOC("copy deflation vectors"); // Number of nonzeros in local and remote parts of the matrix. ptrdiff_t loc_nnz = 0, rem_nnz = 0; - TIC("first pass"); + AMGCL_TIC("first pass"); // First pass over Astrip rows: // 1. Count local and remote nonzeros, // 3. Build sparsity pattern of matrix AZ. @@ -218,9 +218,9 @@ class subdomain_deflation { } } } - TOC("first pass"); + AMGCL_TOC("first pass"); - TIC("second pass"); + AMGCL_TIC("second pass"); // Second pass over Astrip rows: // 1. Build local and remote matrix parts. // 2. Build local part of AZ matrix. @@ -273,7 +273,7 @@ class subdomain_deflation { aloc->ptr[i+1] = loc_head; arem->ptr[i+1] = rem_head; } - TOC("second pass"); + AMGCL_TOC("second pass"); // Create local preconditioner. P = boost::make_shared( *aloc, prm.local, bprm ); @@ -284,7 +284,7 @@ class subdomain_deflation { Arem = backend_type::copy_matrix(arem, bprm); A = boost::make_shared(*C, P->system_matrix(), *Arem); - TIC("A*Z"); + AMGCL_TIC("A*Z"); /* Finish construction of AZ */ // Exchange deflation vectors std::vector zrecv_ptr(C->recv.nbr.size() + 1, 0); @@ -358,12 +358,12 @@ class subdomain_deflation { std::rotate(az->ptr, az->ptr + nrows, az->ptr + nrows + 1); az->ptr[0] = 0; - TOC("A*Z"); + AMGCL_TOC("A*Z"); MPI_Waitall(C->send.req.size(), &C->send.req[0], MPI_STATUSES_IGNORE); /* Build deflated matrix E. */ - TIC("assemble E"); + AMGCL_TIC("assemble E"); // Who is responsible for solution of coarse problem int nmasters = std::min(comm.size, DirectSolver::comm_size(nz, prm.dsolver)); int nslaves = (comm.size + nmasters - 1) / nmasters; @@ -488,10 +488,10 @@ class subdomain_deflation { dtype, 0, slaves_comm ); } - TOC("assemble E"); + AMGCL_TOC("assemble E"); // Prepare E factorization. - TIC("factorize E"); + AMGCL_TIC("factorize E"); if (comm.rank == master_rank) { E = boost::make_shared( masters_comm, Eptr.size() - 1, Eptr, Ecol, Eval, prm.dsolver @@ -500,9 +500,9 @@ class subdomain_deflation { cf.resize(Eptr.size() - 1); cx.resize(Eptr.size() - 1); } - TOC("factorize E"); + AMGCL_TOC("factorize E"); - TOC("setup deflation"); + AMGCL_TOC("setup deflation"); // Move matrices to backend. AZ = backend_type::copy_matrix(az, bprm); @@ -561,18 +561,18 @@ class subdomain_deflation { template void mul(value_type alpha, const Vec1 &x, value_type beta, Vec2 &y) const { - TIC("top/spmv"); + AMGCL_TIC("top/spmv"); backend::spmv(alpha, *A, x, beta, y); - TOC("top/spmv"); + AMGCL_TOC("top/spmv"); project(y); } template void residual(const Vec1 &f, const Vec2 &x, Vec3 &r) const { - TIC("top/residual"); + AMGCL_TIC("top/residual"); backend::residual(f, *A, x, r); - TOC("top/residual"); + AMGCL_TOC("top/residual"); project(r); } @@ -614,27 +614,27 @@ class subdomain_deflation { template void project(Vector &x) const { - TIC("project"); + AMGCL_TIC("project"); - TIC("local inner product"); + AMGCL_TIC("local inner product"); for(ptrdiff_t j = 0; j < ndv; ++j) df[j] = backend::inner_product(x, *Z[j]); - TOC("local inner product"); + AMGCL_TOC("local inner product"); coarse_solve(df, dx); - TIC("spmv"); + AMGCL_TIC("spmv"); backend::copy_to_backend(dx, *dd); backend::spmv(-1, *AZ, *dd, 1, x); - TOC("spmv"); + AMGCL_TOC("spmv"); - TOC("project"); + AMGCL_TOC("project"); } void coarse_solve(std::vector &f, std::vector &x) const { - TIC("coarse solve"); - TIC("exchange rhs"); + AMGCL_TIC("coarse solve"); + AMGCL_TIC("exchange rhs"); if (comm.rank == master_rank) { MPI_Gatherv( &f[0], f.size(), dtype, &cf[0], @@ -647,41 +647,41 @@ class subdomain_deflation { dtype, 0, slaves_comm ); } - TOC("exchange rhs"); + AMGCL_TOC("exchange rhs"); if (comm.rank == master_rank) { - TIC("call solver"); + AMGCL_TIC("call solver"); (*E)(cf, cx); - TOC("call solver"); + AMGCL_TOC("call solver"); - TIC("gather result"); + AMGCL_TIC("gather result"); MPI_Gatherv( &cx[0], cx.size(), dtype, &x[0], const_cast(&msize[0]), const_cast(&mstart[0]), dtype, 0, masters_comm ); - TOC("gather result"); + AMGCL_TOC("gather result"); } - TIC("broadcast result"); + AMGCL_TIC("broadcast result"); MPI_Bcast(&x[0], x.size(), dtype, 0, comm); - TOC("broadcast result"); - TOC("coarse solve"); + AMGCL_TOC("broadcast result"); + AMGCL_TOC("coarse solve"); } template void postprocess(const Vec1 &rhs, Vec2 &x) const { - TIC("postprocess"); + AMGCL_TIC("postprocess"); // q = Ax backend::spmv(1, *A, x, 0, *q); // df = transp(Z) * (rhs - Ax) - TIC("local inner product"); + AMGCL_TIC("local inner product"); for(ptrdiff_t j = 0; j < ndv; ++j) df[j] = backend::inner_product(rhs, *Z[j]) - backend::inner_product(*q, *Z[j]); - TOC("local inner product"); + AMGCL_TOC("local inner product"); // dx = inv(E) * df coarse_solve(df, dx); @@ -689,7 +689,7 @@ class subdomain_deflation { // x += Z * dx backend::lin_comb(ndv, &dx[dv_start[comm.rank]], Z, 1, x); - TOC("postprocess"); + AMGCL_TOC("postprocess"); } }; diff --git a/external_libraries/amgcl/preconditioner/dummy.hpp b/external_libraries/amgcl/preconditioner/dummy.hpp index 77547276e189..acd3612a543f 100644 --- a/external_libraries/amgcl/preconditioner/dummy.hpp +++ b/external_libraries/amgcl/preconditioner/dummy.hpp @@ -63,7 +63,7 @@ class dummy { dummy( boost::shared_ptr M, - const params &prm = params(), + const params& = params(), const backend_params &bprm = backend_params() ) : A(Backend::copy_matrix(M, bprm)) diff --git a/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp b/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp index 1e33193a96eb..6b5a1922f18d 100644 --- a/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp +++ b/external_libraries/amgcl/preconditioner/schur_pressure_correction.hpp @@ -118,11 +118,17 @@ class schur_pressure_correction { std::vector pmask; - params() {} + // Approximate Kuu^-1 with inverted diagonal of Kuu during + // construction of matrix-less Schur complement. + // When false, USolver is used instead. + bool approx_schur; + + params() : approx_schur(true) {} params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_CHILD(p, usolver), - AMGCL_PARAMS_IMPORT_CHILD(p, psolver) + AMGCL_PARAMS_IMPORT_CHILD(p, psolver), + AMGCL_PARAMS_IMPORT_VALUE(p, approx_schur) { void *pm = 0; size_t n = 0; @@ -141,13 +147,14 @@ class schur_pressure_correction { pmask.assign(static_cast(pm), static_cast(pm) + n); - AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size)); + AMGCL_PARAMS_CHECK(p, (usolver)(psolver)(pmask)(pmask_size)(approx_schur)); } void get(boost::property_tree::ptree &p, const std::string &path = "") const { AMGCL_PARAMS_EXPORT_CHILD(p, path, usolver); AMGCL_PARAMS_EXPORT_CHILD(p, path, psolver); + AMGCL_PARAMS_EXPORT_VALUE(p, path, approx_schur); } } prm; @@ -218,8 +225,14 @@ class schur_pressure_correction { backend::spmv( alpha, P->system_matrix(), x, beta, y); backend::spmv(1, *Kup, x, 0, *tmp); - backend::clear(*u); - (*U)(*tmp, *u); + + if (prm.approx_schur) { + backend::vmul(1, *M, *tmp, 0, *u); + } else { + backend::clear(*u); + (*U)(*tmp, *u); + } + backend::spmv(-alpha, *Kpu, *u, 1, y); } private: @@ -227,6 +240,7 @@ class schur_pressure_correction { boost::shared_ptr K, Kup, Kpu, x2u, x2p, u2x, p2x; boost::shared_ptr rhs_u, rhs_p, u, p, tmp; + boost::shared_ptr M; boost::shared_ptr U; boost::shared_ptr P; @@ -345,6 +359,9 @@ class schur_pressure_correction { tmp = backend_type::create_vector(nu, bprm); + if (prm.approx_schur) + M = backend_type::copy_vector(diagonal(*Kuu, /*invert = */true), bprm); + // Scatter/Gather matrices boost::shared_ptr x2u = boost::make_shared(); boost::shared_ptr x2p = boost::make_shared(); diff --git a/external_libraries/amgcl/profiler.hpp b/external_libraries/amgcl/profiler.hpp index 3dcc773cf815..56a33eb83931 100644 --- a/external_libraries/amgcl/profiler.hpp +++ b/external_libraries/amgcl/profiler.hpp @@ -55,6 +55,7 @@ template class profiler { public: typedef typename Counter::value_type value_type; + typedef double delta_type; /// Initialization. /** @@ -79,12 +80,12 @@ class profiler { /** * Returns delta in the measured value since the corresponding tic(). */ - value_type toc(const std::string& /*name*/) { + delta_type toc(const std::string& /*name*/) { profile_unit *top = stack.back(); stack.pop_back(); value_type current = counter.current(); - value_type delta = current - top->begin; + delta_type delta = current - top->begin; top->length += delta; root.length = current - root.begin; @@ -105,8 +106,8 @@ class profiler { struct profile_unit { profile_unit() : length(0) {} - value_type children_time() const { - value_type s = value_type(); + delta_type children_time() const { + delta_type s = delta_type(); for(typename std::map::const_iterator c = children.begin(); c != children.end(); c++) s += c->second.length; return s; @@ -120,7 +121,7 @@ class profiler { } void print(std::ostream &out, const std::string &name, - int level, value_type total, size_t width) const + int level, delta_type total, size_t width) const { using namespace std; @@ -128,7 +129,7 @@ class profiler { print_line(out, name, length, 100 * length / total, width - level); if (children.size()) { - value_type val = length - children_time(); + delta_type val = length - children_time(); double perc = 100.0 * val / total; if (perc > 1e-1) { @@ -142,7 +143,7 @@ class profiler { } void print_line(std::ostream &out, const std::string &name, - value_type time, double perc, size_t width) const + delta_type time, double perc, size_t width) const { using namespace std; @@ -155,7 +156,7 @@ class profiler { } value_type begin; - value_type length; + delta_type length; std::map children; }; diff --git a/external_libraries/amgcl/relaxation/chebyshev.hpp b/external_libraries/amgcl/relaxation/chebyshev.hpp index 541176decb24..924d48067a12 100644 --- a/external_libraries/amgcl/relaxation/chebyshev.hpp +++ b/external_libraries/amgcl/relaxation/chebyshev.hpp @@ -185,10 +185,12 @@ class chebyshev { } } + // Uses Gershgorin disc theorem to estimate spectral radius of the + // matrix template static scalar_type spectral_radius(const Matrix &A) { typedef typename backend::row_iterator::type row_iterator; - const size_t n = rows(A); + const ptrdiff_t n = rows(A); scalar_type emax = 0; @@ -196,7 +198,7 @@ class chebyshev { { scalar_type my_emax = 0; #pragma omp for nowait - for(ptrdiff_t i = 0; i < static_cast(n); ++i) { + for(ptrdiff_t i = 0; i < n; ++i) { scalar_type hi = 0; for(row_iterator a = backend::row_begin(A, i); a; ++a) diff --git a/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp b/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp index ebaf27e82d05..b523ea346ae6 100644 --- a/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp +++ b/external_libraries/amgcl/relaxation/detail/ilu_solve.hpp @@ -1,7 +1,39 @@ #ifndef AMGCL_RELAXATION_DETAIL_ILU_SOLVE_HPP #define AMGCL_RELAXATION_DETAIL_ILU_SOLVE_HPP +/* +The MIT License + +Copyright (c) 2012-2017 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * \file amgcl/relaxation/detail/ilu_solve.hpp + * \author Denis Demidov + * \brief Solver for sparse triangular systems obtained as a result of an + * incomplete LU factorization. + */ + #include +#include namespace amgcl { namespace relaxation { @@ -9,7 +41,8 @@ namespace detail { template class ilu_solve { - private: + public: + typedef typename Backend::params backend_params; typedef typename Backend::value_type value_type; typedef typename Backend::matrix matrix; typedef typename Backend::vector vector; @@ -17,23 +50,37 @@ class ilu_solve { typedef typename backend::builtin::matrix build_matrix; typedef typename math::scalar_of::type scalar_type; - unsigned jacobi_iters; + struct params { + /// Number of Jacobi iterations. + unsigned iters; - boost::shared_ptr L; - boost::shared_ptr U; - boost::shared_ptr D; - boost::shared_ptr t1, t2; + /// Damping factor. + scalar_type damping; + + params() : iters(2), damping(0.72) {} + + params(const boost::property_tree::ptree &p) + : AMGCL_PARAMS_IMPORT_VALUE(p, iters) + , AMGCL_PARAMS_IMPORT_VALUE(p, damping) + { + AMGCL_PARAMS_CHECK(p, (iters)(damping)); + } + + void get(boost::property_tree::ptree &p, const std::string &path) const { + AMGCL_PARAMS_EXPORT_VALUE(p, path, iters); + AMGCL_PARAMS_EXPORT_VALUE(p, path, damping); + } + } prm; public: - template ilu_solve( boost::shared_ptr L, boost::shared_ptr U, boost::shared_ptr > D, - const Params &prm, - const typename Backend::params &bprm + const params &prm = params(), + const backend_params &bprm = backend_params() ) : - jacobi_iters(prm.jacobi_iters), + prm(prm), L(Backend::copy_matrix(L, bprm)), U(Backend::copy_matrix(U, bprm)), D(Backend::copy_vector(D, bprm)), @@ -46,24 +93,31 @@ class ilu_solve { vector *y0 = t1.get(); vector *y1 = t2.get(); - backend::copy(x, *y0); - for(unsigned i = 0; i < jacobi_iters; ++i) { + backend::axpby(prm.damping, x, 0.0, *y0); + for(unsigned i = 0; i < prm.iters; ++i) { backend::residual(x, *L, *y0, *y1); - std::swap(y0, y1); + backend::axpby(prm.damping, *y1, (1-prm.damping), *y0); } - backend::copy(*y0, x); - for(unsigned i = 0; i < jacobi_iters; ++i) { + backend::vmul(prm.damping, *D, *y0, 0.0, x); + for(unsigned i = 0; i < prm.iters; ++i) { backend::residual(*y0, *U, x, *y1); - backend::vmul(math::identity(), *D, *y1, math::zero(), x); + backend::vmul(prm.damping, *D, *y1, (1-prm.damping), x); } } + + private: + boost::shared_ptr L; + boost::shared_ptr U; + boost::shared_ptr D; + boost::shared_ptr t1, t2; }; template class ilu_solve< backend::builtin > { public: typedef backend::builtin Backend; + typedef typename Backend::params backend_params; typedef typename Backend::matrix matrix; typedef typename Backend::vector vector; typedef typename Backend::matrix_diagonal matrix_diagonal; @@ -71,23 +125,40 @@ class ilu_solve< backend::builtin > { typedef typename Backend::rhs_type rhs_type; typedef typename math::scalar_of::type scalar_type; - template + struct params { + /// Use serial version of the algorithm + bool serial; + + params() : serial(num_threads() < 4) {} + + params(const boost::property_tree::ptree &p) + : AMGCL_PARAMS_IMPORT_VALUE(p, serial) + { + AMGCL_PARAMS_CHECK(p, (serial)); + } + + void get(boost::property_tree::ptree &p, const std::string &path) const { + AMGCL_PARAMS_EXPORT_VALUE(p, path, serial); + } + } prm; + ilu_solve( boost::shared_ptr L, boost::shared_ptr U, boost::shared_ptr > D, - const Params &prm, const typename Backend::params &bprm - ) : is_serial(prm.serial || num_threads() < 4) + const params &prm = params(), + const backend_params& = backend_params() + ) : prm(prm) { - if (is_serial) - serial_init(L, U, D, bprm); + if (prm.serial) + serial_init(L, U, D); else - parallel_init(L, U, D, bprm); + parallel_init(L, U, D); } template void solve(Vector &x) { - if (is_serial) + if (prm.serial) serial_solve(x); else parallel_solve(x); @@ -121,8 +192,7 @@ class ilu_solve< backend::builtin > { void serial_init( boost::shared_ptr L, boost::shared_ptr U, - boost::shared_ptr D, - const typename Backend::params& + boost::shared_ptr D ) { this->L = L; @@ -325,8 +395,7 @@ class ilu_solve< backend::builtin > { void parallel_init( boost::shared_ptr L, boost::shared_ptr U, - boost::shared_ptr > D, - const typename Backend::params& + boost::shared_ptr > D ) { lower = boost::make_shared< sptr_solve >(*L, D->data()); diff --git a/external_libraries/amgcl/relaxation/ilu0.hpp b/external_libraries/amgcl/relaxation/ilu0.hpp index b8cb1f99f6a8..ce0623bf4e80 100644 --- a/external_libraries/amgcl/relaxation/ilu0.hpp +++ b/external_libraries/amgcl/relaxation/ilu0.hpp @@ -55,33 +55,28 @@ struct ilu0 { typedef typename Backend::matrix_diagonal matrix_diagonal; typedef typename math::scalar_of::type scalar_type; + typedef detail::ilu_solve ilu_solve; /// Relaxation parameters. struct params { /// Damping factor. scalar_type damping; - /// Use serial version of the algorithm - bool serial; + /// Parameters for sparse triangular system solver + typename ilu_solve::params solve; - /// Number of Jacobi iterations. - /** \note Used for approximate solution of triangular systems on parallel backends */ - unsigned jacobi_iters; - - params() : damping(1), serial(false), jacobi_iters(2) {} + params() : damping(1) {} params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_VALUE(p, damping) - , AMGCL_PARAMS_IMPORT_VALUE(p, serial) - , AMGCL_PARAMS_IMPORT_VALUE(p, jacobi_iters) + , AMGCL_PARAMS_IMPORT_CHILD(p, solve) { - AMGCL_PARAMS_CHECK(p, (damping)(serial)(jacobi_iters)); + AMGCL_PARAMS_CHECK(p, (damping)(solve)); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_VALUE(p, path, damping); - AMGCL_PARAMS_EXPORT_VALUE(p, path, serial); - AMGCL_PARAMS_EXPORT_VALUE(p, path, jacobi_iters); + AMGCL_PARAMS_EXPORT_CHILD(p, path, solve); } }; @@ -176,7 +171,7 @@ struct ilu0 { work[A.col[j]] = NULL; } - ilu = boost::make_shared(L, U, D, prm, bprm); + ilu = boost::make_shared(L, U, D, prm.solve, bprm); } /// \copydoc amgcl::relaxation::damped_jacobi::apply_pre @@ -212,7 +207,6 @@ struct ilu0 { } private: - typedef detail::ilu_solve ilu_solve; boost::shared_ptr ilu; }; diff --git a/external_libraries/amgcl/relaxation/iluk.hpp b/external_libraries/amgcl/relaxation/iluk.hpp index a493b2b0cbf8..66bddd854410 100644 --- a/external_libraries/amgcl/relaxation/iluk.hpp +++ b/external_libraries/amgcl/relaxation/iluk.hpp @@ -55,6 +55,8 @@ struct iluk { typedef typename math::scalar_of::type scalar_type; + typedef detail::ilu_solve ilu_solve; + /// Relaxation parameters. struct params { /// Level of fill-in. @@ -63,29 +65,23 @@ struct iluk { /// Damping factor. scalar_type damping; - /// Use serial version of the algorithm - bool serial; - - /// Number of Jacobi iterations. - /** \note Used for approximate solution of triangular systems on parallel backends */ - unsigned jacobi_iters; + /// Parameters for sparse triangular system solver + typename ilu_solve::params solve; - params() : k(1), damping(1), serial(false), jacobi_iters(2) {} + params() : k(1), damping(1) {} params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_VALUE(p, k) , AMGCL_PARAMS_IMPORT_VALUE(p, damping) - , AMGCL_PARAMS_IMPORT_VALUE(p, serial) - , AMGCL_PARAMS_IMPORT_VALUE(p, jacobi_iters) + , AMGCL_PARAMS_IMPORT_CHILD(p, solve) { - AMGCL_PARAMS_CHECK(p, (k)(damping)(serial)(jacobi_iters)); + AMGCL_PARAMS_CHECK(p, (k)(damping)(solve)); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_VALUE(p, path, k); AMGCL_PARAMS_EXPORT_VALUE(p, path, damping); - AMGCL_PARAMS_EXPORT_VALUE(p, path, serial); - AMGCL_PARAMS_EXPORT_VALUE(p, path, jacobi_iters); + AMGCL_PARAMS_EXPORT_CHILD(p, path, solve); } }; @@ -154,7 +150,7 @@ struct iluk { ilu = boost::make_shared( boost::make_shared(n, n, Lptr, Lcol, Lval), boost::make_shared(n, n, Uptr, Ucol, Uval), - D, prm, bprm); + D, prm.solve, bprm); } /// \copydoc amgcl::relaxation::damped_jacobi::apply_pre @@ -189,7 +185,6 @@ struct iluk { } private: - typedef detail::ilu_solve ilu_solve; boost::shared_ptr ilu; struct nonzero { diff --git a/external_libraries/amgcl/relaxation/ilut.hpp b/external_libraries/amgcl/relaxation/ilut.hpp index 9cdbdf04872f..162e97f169b9 100644 --- a/external_libraries/amgcl/relaxation/ilut.hpp +++ b/external_libraries/amgcl/relaxation/ilut.hpp @@ -62,6 +62,8 @@ struct ilut { typedef typename math::scalar_of::type scalar_type; + typedef detail::ilu_solve ilu_solve; + /// Relaxation parameters. struct params { /// Fill factor. @@ -73,31 +75,25 @@ struct ilut { /// Damping factor. scalar_type damping; - /// Use serial version of the algorithm - bool serial; - - /// Number of Jacobi iterations. - /** \note Used for approximate solution of triangular systems on parallel backends */ - unsigned jacobi_iters; + /// Parameters for sparse triangular system solver + typename ilu_solve::params solve; - params() : p(2), tau(1e-2f), damping(1), serial(false), jacobi_iters(2) {} + params() : p(2), tau(1e-2f), damping(1) {} params(const boost::property_tree::ptree &p) : AMGCL_PARAMS_IMPORT_VALUE(p, p) , AMGCL_PARAMS_IMPORT_VALUE(p, tau) , AMGCL_PARAMS_IMPORT_VALUE(p, damping) - , AMGCL_PARAMS_IMPORT_VALUE(p, serial) - , AMGCL_PARAMS_IMPORT_VALUE(p, jacobi_iters) + , AMGCL_PARAMS_IMPORT_CHILD(p, solve) { - AMGCL_PARAMS_CHECK(p, (p)(tau)(damping)(serial)(jacobi_iters)); + AMGCL_PARAMS_CHECK(p, (p)(tau)(damping)(solve)); } void get(boost::property_tree::ptree &p, const std::string &path) const { AMGCL_PARAMS_EXPORT_VALUE(p, path, p); AMGCL_PARAMS_EXPORT_VALUE(p, path, tau); AMGCL_PARAMS_EXPORT_VALUE(p, path, damping); - AMGCL_PARAMS_EXPORT_VALUE(p, path, serial); - AMGCL_PARAMS_EXPORT_VALUE(p, path, jacobi_iters); + AMGCL_PARAMS_EXPORT_CHILD(p, path, solve); } }; @@ -179,7 +175,7 @@ struct ilut { L->nnz = L->ptr[n]; U->nnz = U->ptr[n]; - ilu = boost::make_shared(L, U, D, prm, bprm); + ilu = boost::make_shared(L, U, D, prm.solve, bprm); } /// \copydoc amgcl::relaxation::damped_jacobi::apply_pre @@ -215,7 +211,6 @@ struct ilut { private: typedef typename backend::builtin::matrix build_matrix; - typedef detail::ilu_solve ilu_solve; boost::shared_ptr ilu; struct sparse_vector { diff --git a/external_libraries/amgcl/runtime.hpp b/external_libraries/amgcl/runtime.hpp index c2cdf5300840..c74f6d134fba 100644 --- a/external_libraries/amgcl/runtime.hpp +++ b/external_libraries/amgcl/runtime.hpp @@ -54,6 +54,7 @@ THE SOFTWARE. #include #include #include +#include #include @@ -522,7 +523,8 @@ enum type { bicgstabl, ///< BiCGStab(ell) gmres, ///< GMRES lgmres, ///< LGMRES - fgmres ///< FGMRES + fgmres, ///< FGMRES + idrs ///< IDR(s) }; inline std::ostream& operator<<(std::ostream &os, type s) @@ -540,6 +542,8 @@ inline std::ostream& operator<<(std::ostream &os, type s) return os << "lgmres"; case fgmres: return os << "fgmres"; + case idrs: + return os << "idrs"; default: return os << "???"; } @@ -562,6 +566,8 @@ inline std::istream& operator>>(std::istream &in, type &s) s = lgmres; else if (val == "fgmres") s = fgmres; + else if (val == "idrs") + s = idrs; else throw std::invalid_argument("Invalid solver value"); @@ -619,6 +625,12 @@ inline void process_solver( func.template process(); } break; + case runtime::solver::idrs: + { + typedef amgcl::solver::idrs Solver; + func.template process(); + } + break; } } diff --git a/external_libraries/amgcl/solver/skyline_lu.hpp b/external_libraries/amgcl/solver/skyline_lu.hpp index 5c593983334a..434957a73a36 100644 --- a/external_libraries/amgcl/solver/skyline_lu.hpp +++ b/external_libraries/amgcl/solver/skyline_lu.hpp @@ -70,126 +70,17 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include +#include #include namespace amgcl { namespace solver { -namespace matrix_permutation { - -struct none { - template - static void get(const Matrix&, std::vector &perm) { - for(size_t i = 0; i < perm.size(); ++i) perm[i] = i; - } -}; - -template -struct CuthillMcKee { - template - static void get(const Matrix &A, std::vector &perm) { - typedef typename backend::row_iterator::type row_iterator; - const int n = backend::rows(A); - - /* The data structure used to sort and traverse the level sets: - * - * The current level set is currentLevelSet; - * In this level set, there are nodes with degrees from 0 (not really - * useful) to maxDegreeInCurrentLevelSet. - * firstWithDegree[i] points to a node with degree i, or to -1 if it - * does not exist. nextSameDegree[firstWithDegree[i]] points to the - * second node with that degree, etc. - * While the level set is being traversed, the structure for the next - * level set is generated; nMDICLS will be the next - * maxDegreeInCurrentLevelSet and nFirstWithDegree will be - * firstWithDegree. - */ - int initialNode = 0; // node to start search - int maxDegree = 0; - - std::vector degree(n); - std::vector levelSet(n, 0); - std::vector nextSameDegree(n, -1); - - for(int i = 0; i < n; ++i) { - degree[i] = backend::row_nonzeros(A, i); - maxDegree = std::max(maxDegree, degree[i]); - } - - std::vector firstWithDegree(maxDegree + 1, -1); - std::vector nFirstWithDegree(maxDegree + 1); - - // Initialize the first level set, made up by initialNode alone - perm[0] = initialNode; - int currentLevelSet = 1; - levelSet[initialNode] = currentLevelSet; - int maxDegreeInCurrentLevelSet = degree[initialNode]; - firstWithDegree[maxDegreeInCurrentLevelSet] = initialNode; - - // Main loop - for (int next = 1; next < n; ) { - int nMDICLS = 0; - std::fill(nFirstWithDegree.begin(), nFirstWithDegree.end(), -1); - bool empty = true; // used to detect different connected components - - int firstVal = reverse ? maxDegreeInCurrentLevelSet : 0; - int finalVal = reverse ? -1 : maxDegreeInCurrentLevelSet + 1; - int increment = reverse ? -1 : 1; - - for(int soughtDegree = firstVal; soughtDegree != finalVal; soughtDegree += increment) - { - int node = firstWithDegree[soughtDegree]; - while (node > 0) { - // Visit neighbors - for(row_iterator a = backend::row_begin(A, node); a; ++a) { - int c = a.col(); - if (levelSet[c] == 0) { - levelSet[c] = currentLevelSet + 1; - perm[next] = c; - ++next; - empty = false; // this level set is not empty - nextSameDegree[c] = nFirstWithDegree[degree[c]]; - nFirstWithDegree[degree[c]] = c; - nMDICLS = std::max(nMDICLS, degree[c]); - } - } - node = nextSameDegree[node]; - } - } - - ++currentLevelSet; - maxDegreeInCurrentLevelSet = nMDICLS; - for(int i = 0; i <= nMDICLS; ++i) - firstWithDegree[i] = nFirstWithDegree[i]; - - if (empty) { - // The graph contains another connected component that we - // cannot reach. Search for a node that has not yet been - // included in a level set, and start exploring from it. - bool found = false; - for(int i = 0; i < n; ++i) { - if (levelSet[i] == 0) { - perm[next] = i; - ++next; - levelSet[i] = currentLevelSet; - maxDegreeInCurrentLevelSet = degree[i]; - firstWithDegree[maxDegreeInCurrentLevelSet] = i; - found = true; - break; - } - } - precondition(found, "Internal consistency error at skyline_lu"); - } - } - } -}; - -} - /// Direct solver that uses skyline LU factorization. template < typename ValueType, - class ordering = matrix_permutation::CuthillMcKee + class ordering = reorder::cuthill_mckee > class skyline_lu { public: @@ -345,11 +236,12 @@ class skyline_lu { */ void factorize() { precondition(!math::is_zero(D[0]), "Zero diagonal in skyline_lu"); + D[0] = math::inverse(D[0]); for(int k = 0; k < n - 1; ++k) { // check whether A(1,k+1) lies within the skyline structure if (ptr[k + 1] + k + 1 == ptr[k + 2]) { - U[ptr[k+1]] = math::inverse(D[0]) * U[ptr[k+1]]; + U[ptr[k+1]] = D[0] * U[ptr[k+1]]; } // Compute column k+1 of U @@ -369,7 +261,7 @@ class skyline_lu { for(int j = jBeginMult; j < i; ++j, ++indexL, ++indexU) sum -= L[indexL] * U[indexU]; - U[indexEntry] = math::inverse(D[i]) * sum; + U[indexEntry] = D[i] * sum; } // Compute row k+1 of L @@ -394,18 +286,15 @@ class skyline_lu { } // Find element in diagonal - value_type sum = D[k + 1]; + value_type sum = D[k+1]; for(int j = ptr[k+1]; j < ptr[k+2]; ++j) sum -= L[j] * U[j]; precondition(!math::is_zero(sum), "Zero sum in skyline_lu factorization"); - D[k+1] = sum; + D[k+1] = math::inverse(sum); } - - // Invert diagonal - for(int i = 0; i < n; ++i) D[i] = math::inverse(D[i]); } }; diff --git a/external_libraries/amgcl/util.hpp b/external_libraries/amgcl/util.hpp index 8f2c6f02705d..5657ff2f9637 100644 --- a/external_libraries/amgcl/util.hpp +++ b/external_libraries/amgcl/util.hpp @@ -42,23 +42,27 @@ THE SOFTWARE. /* Performance measurement macros * - * If AMGCL_PROFILING macro is defined at compilation, then TIC(name) and - * TOC(name) macros correspond to prof.tic(name) and prof.toc(name). + * If AMGCL_PROFILING macro is defined at compilation, then AMGCL_TIC(name) and + * AMGCL_TOC(name) macros correspond to prof.tic(name) and prof.toc(name). * amgcl::prof should be an instance of amgcl::profiler<> defined in a user * code similar to: * \code * namespace amgcl { profiler<> prof; } * \endcode - * If AMGCL_PROFILING is undefined, then TIC and TOC are noop macros. + * If AMGCL_PROFILING is undefined, then AMGCL_TIC and AMGCL_TOC are noop macros. */ #ifdef AMGCL_PROFILING # include -# define TIC(name) amgcl::prof.tic(name); -# define TOC(name) amgcl::prof.toc(name); +# define AMGCL_TIC(name) amgcl::prof.tic(name); +# define AMGCL_TOC(name) amgcl::prof.toc(name); namespace amgcl { extern profiler<> prof; } #else -# define TIC(name) -# define TOC(name) +# ifndef AMGCL_TIC +# define AMGCL_TIC(name) +# endif +# ifndef AMGCL_TOC +# define AMGCL_TOC(name) +# endif #endif #define AMGCL_DEBUG_SHOW(x) \ From f205b3ea7b453064d6aa9f12d86f04e02ac40120 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Thu, 10 Aug 2017 14:40:43 +0200 Subject: [PATCH 08/12] changing name to the files to be more consistent --- .../amgcl_mpi_schur_complement_solver.h | 63 ++- .../{amgcl_solver.h => amgcl_mpi_solver.h} | 0 .../external_includes/mpi_amgcl_ns_solver.h | 391 ------------------ .../tests/test_trilinos_linear_solvers.py | 3 +- 4 files changed, 42 insertions(+), 415 deletions(-) rename applications/trilinos_application/external_includes/{amgcl_solver.h => amgcl_mpi_solver.h} (100%) delete mode 100644 applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h index 7c45cc4c935c..ad3e7830edc7 100644 --- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h +++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h @@ -138,13 +138,13 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); //setting pressure solver options - mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); - mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); - mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); - mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); - mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0); - mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1); - mprm.put("precond.psolver.precond.coarse_enough",mCoarseEnough); + mprm.put("precond.psolver.solver.local.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); + mprm.put("precond.psolver.solver.local.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); + mprm.put("precond.psolver.solver.local.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); +// mprm.put("precond.psolver.precond.local.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); +// mprm.put("precond.psolver.precond.local.coarsening.aggr.eps_strong", 0.0); +// mprm.put("precond.psolver.precond.local.coarsening.aggr.block_size", 1); +// mprm.put("precond.psolver.precond.local.coarse_enough",mCoarseEnough); } @@ -184,26 +184,45 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, typedef amgcl::backend::builtin Backend; - typedef amgcl::mpi::make_solver< - amgcl::mpi::schur_pressure_correction< - amgcl::mpi::make_solver< - amgcl::mpi::block_preconditioner< - amgcl::runtime::relaxation::as_preconditioner +// typedef amgcl::mpi::make_solver< +// amgcl::mpi::schur_pressure_correction< +// amgcl::mpi::make_solver< +// amgcl::mpi::block_preconditioner< +// amgcl::runtime::relaxation::as_preconditioner +// >, +// amgcl::runtime::iterative_solver +// >, +// amgcl::mpi::make_solver< +// amgcl::mpi::subdomain_deflation< +// amgcl::runtime::amg, +// amgcl::runtime::iterative_solver, +// amgcl::runtime::mpi::direct_solver +// >, +// amgcl::runtime::iterative_solver +// > +// >, +// amgcl::runtime::iterative_solver +// > SDD; + typedef amgcl::mpi::make_solver< + amgcl::mpi::schur_pressure_correction< + amgcl::mpi::make_solver< + amgcl::mpi::block_preconditioner< + amgcl::runtime::relaxation::as_preconditioner >, - amgcl::runtime::iterative_solver + amgcl::runtime::solver::bicgstab >, - amgcl::mpi::make_solver< - amgcl::mpi::subdomain_deflation< - amgcl::runtime::amg, - amgcl::runtime::iterative_solver, - amgcl::runtime::mpi::direct_solver + amgcl::mpi::make_solver< + amgcl::mpi::subdomain_deflation< + amg, + amgcl::runtime::solver::bicgstab, + amgcl::mpi::skyline_lu >, - amgcl::runtime::iterative_solver + amgcl::runtime::solver::fgmres > >, - amgcl::runtime::iterative_solver - > SDD; - + amgcl::runtime::solver::fgmres + > SDD; + boost::function dv = amgcl::mpi::constant_deflation(1); mprm.put("precond.psolver.precond.num_def_vec", 1); mprm.put("precond.psolver.precond.def_vec", &dv); diff --git a/applications/trilinos_application/external_includes/amgcl_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_solver.h similarity index 100% rename from applications/trilinos_application/external_includes/amgcl_solver.h rename to applications/trilinos_application/external_includes/amgcl_mpi_solver.h diff --git a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h b/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h deleted file mode 100644 index 52521a58bd55..000000000000 --- a/applications/trilinos_application/external_includes/mpi_amgcl_ns_solver.h +++ /dev/null @@ -1,391 +0,0 @@ -// | / | -// ' / __| _` | __| _ \ __| -// . \ | ( | | ( |\__ ` -// _|\_\_| \__,_|\__|\___/ ____/ -// Multi-Physics -// -// License: BSD License -// Kratos default license: kratos/license.txt -// -// Main authors: Denis Demidov -// Riccardo Rossi -// - - -#if !defined(KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED ) -#define KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED - - -#ifndef AMGCL_PARAM_UNKNOWN -# define AMGCL_PARAM_UNKNOWN(name) \ - std::cerr << "AMGCL WARNING: unknown parameter " << name << std::endl -#endif - - -// External includes -#include "boost/smart_ptr.hpp" - -// Project includes -#include "includes/define.h" -#include "includes/kratos_parameters.h" -#include "linear_solvers/linear_solver.h" -#include "external_includes/amgcl_solver.h" - -//aztec solver includes -#include "AztecOO.h" -#include "Epetra_LinearProblem.h" -//#include "Teuchos_ParameterList.hpp" - - -#include -#include -#include - -#include -#include -#include -#include -#include -#include //needed to print AMGCL internal settings - - - -namespace Kratos -{ - - - -template< class TSparseSpaceType, class TDenseSpaceType, - class TReordererType = Reorderer > -class AmgclMPI_NS_Solver : public LinearSolver< TSparseSpaceType, - TDenseSpaceType, TReordererType> -{ -public: - /** - * Counted pointer of AmgclMPI_NS_Solver - */ - KRATOS_CLASS_POINTER_DEFINITION ( AmgclMPI_NS_Solver ); - - typedef LinearSolver BaseType; - - typedef typename TSparseSpaceType::MatrixType SparseMatrixType; - - typedef typename TSparseSpaceType::VectorType VectorType; - - typedef typename TDenseSpaceType::MatrixType DenseMatrixType; - - - AmgclMPI_NS_Solver ( Parameters rParameters ) - { -// Parameters default_parameters( R"( -// { -// "solver_type" : "AmgclMPI_NS_Solver", -// "krylov_type" : "fgmres", -// "velocity_block_preconditioner" : -// { -// "krylov_type" : "lgmres", -// "tolerance" : 1e-3, -// "preconditioner_type" : "ilu0", -// "max_iteration": 5 -// }, -// "pressure_block_preconditioner" : -// { -// "krylov_type" : "lgmres", -// "tolerance" : 1e-2, -// "preconditioner_type" : "spai0", -// "max_iteration": 20 -// }, -// "tolerance" : 1e-9, -// "gmres_krylov_space_dimension": 50, -// "coarsening_type": "aggregation", -// "max_iteration": 50, -// "verbosity" : 1, -// "scaling": false, -// "coarse_enough" : 5000 -// } )" ); -// -// -// //now validate agains defaults -- this also ensures no type mismatch -// rParameters.ValidateAndAssignDefaults(default_parameters); -// -// std::stringstream msg; -// -// //validate if values are admissible -// std::set available_preconditioners = {"spai0","ilu0","damped_jacobi","gauss_seidel","chebyshev"}; -// -// //check velocity block settings -// if(available_preconditioners.find(rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()) == available_preconditioners.end()) -// { -// msg << "currently prescribed velocity_block_preconditioner preconditioner_type : " << rParameters["velocity_block_preconditioner"]["smoother_type"].GetString() << std::endl; -// msg << "admissible values are : spai0,ilu0,damped_jacobi,gauss_seidel,chebyshev"<< std::endl; -// KRATOS_THROW_ERROR(std::invalid_argument," smoother_type is invalid: ",msg.str()); -// } -// -// mcoarse_enough = rParameters["coarse_enough"].GetInt(); -// -// mtol = rParameters["tolerance"].GetDouble(); -// mmax_it = rParameters["max_iteration"].GetInt(); -// mverbosity=rParameters["verbosity"].GetInt(); -// mprm.put("solver.type", rParameters["krylov_type"].GetString()); -// -// if(rParameters["krylov_type"].GetString() == "gmres" || rParameters["krylov_type"].GetString() == "lgmres" || rParameters["krylov_type"].GetString() == "fgmres") -// mprm.put("solver.M", rParameters["gmres_krylov_space_dimension"].GetInt()); -// -// //setting velocity solver options -// mprm.put("precond.usolver.solver.type", rParameters["velocity_block_preconditioner"]["krylov_type"].GetString()); -// mprm.put("precond.usolver.solver.tol", rParameters["velocity_block_preconditioner"]["tolerance"].GetDouble()); -// mprm.put("precond.usolver.solver.maxiter", rParameters["velocity_block_preconditioner"]["max_iteration"].GetInt()); -// mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); -// -// //setting pressure solver options -// mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); -// mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); -// mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); -// mprm.put("precond.psolver.precond.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); -// mprm.put("precond.psolver.precond.coarsening.aggr.eps_strong", 0.0); -// mprm.put("precond.psolver.precond.coarsening.aggr.block_size", 1); -// mprm.put("precond.psolver.precond.coarse_enough",mcoarse_enough); - - } - - - /** - * Destructor - */ - virtual ~AmgclMPI_NS_Solver() {} - - /** - * Normal solve method. - * Solves the linear system Ax=b and puts the result on SystemVector& rX. - * rX is also th initial guess for iterative methods. - * @param rA. System matrix - * @param rX. Solution vector. - * @param rB. Right hand side vector. - */ - bool Solve ( SparseMatrixType& rA, VectorType& rX, VectorType& rB ) - { - KRATOS_TRY -KRATOS_WATCH(__LINE__) - using amgcl::prof; - prof.reset(); -KRATOS_WATCH(__LINE__) - - amgcl::mpi::communicator world ( MPI_COMM_WORLD ); - if ( mverbosity >=0 && world.rank == 0 ) { - std::cout << "World size: " << world.size << std::endl; - } -KRATOS_WATCH(__LINE__) - - int chunk = rA.NumMyRows(); - boost::iterator_range xrange ( rX.Values(), rX.Values() + chunk ); - boost::iterator_range frange ( rB.Values(), rB.Values() + chunk ); -KRATOS_WATCH(__LINE__) - - if ( mverbosity > 1 && world.rank == 0 ) { - write_json ( std::cout, mprm ); - } -KRATOS_WATCH(__LINE__) - - typedef amgcl::backend::builtin Backend; - - typedef amgcl::mpi::make_solver< - amgcl::mpi::schur_pressure_correction< - amgcl::mpi::make_solver< - amgcl::mpi::block_preconditioner< - amgcl::runtime::relaxation::as_preconditioner - >, - amgcl::runtime::iterative_solver - >, - amgcl::mpi::make_solver< - amgcl::mpi::subdomain_deflation< - amgcl::runtime::amg, - amgcl::runtime::iterative_solver, - amgcl::runtime::mpi::direct_solver - >, - amgcl::runtime::iterative_solver - > - >, - amgcl::runtime::iterative_solver - > SDD; -KRATOS_WATCH(__LINE__) - - boost::function dv = amgcl::mpi::constant_deflation(1); - mprm.put("precond.psolver.precond.num_def_vec", 1); - mprm.put("precond.psolver.precond.def_vec", &dv); - - mprm.put("precond.pmask", static_cast(&mp[0])); - mprm.put("precond.pmask_size", mp.size()); -KRATOS_WATCH(__LINE__) - - prof.tic ( "setup" ); - SDD solve ( world, amgcl::backend::map ( rA ), mprm ); - double tm_setup = prof.toc ( "setup" ); -KRATOS_WATCH(__LINE__) - - prof.tic ( "Solve" ); - size_t iters; - double resid; - boost::tie ( iters, resid ) = solve ( frange, xrange ); - double solve_tm = prof.toc ( "Solve" ); - - - - if ( rA.Comm().MyPID() == 0 ) { - if ( mverbosity > 0 ) { - std::cout - << "------- AMGCL -------\n" << std::endl - << "Iterations : " << iters << std::endl - << "Error : " << resid << std::endl - << "amgcl setup time: " << tm_setup << std::endl - << "amgcl solve time: " << solve_tm << std::endl; - } - - if ( mverbosity > 1 ) { - std::cout << prof << std::endl; - } - } - - - - return true; - - - - - - KRATOS_CATCH ( "" ); - } - - /** - * Multi solve method for solving a set of linear systems with same coefficient matrix. - * Solves the linear system Ax=b and puts the result on SystemVector& rX. - * rX is also th initial guess for iterative methods. - * @param rA. System matrix - * @param rX. Solution vector. - * @param rB. Right hand side vector. - */ - bool Solve ( SparseMatrixType& rA, DenseMatrixType& rX, DenseMatrixType& rB ) - { - - return false; - } - - /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example - * when solving a mixed u-p problem, it is important to identify the row associated to v and p. - * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers - * which require knowledge on the spatial position of the nodes associated to a given dof. - * This function tells if the solver requires such data - */ - virtual bool AdditionalPhysicalDataIsNeeded() - { - return true; - } - - /** Some solvers may require a minimum degree of knowledge of the structure of the matrix. To make an example - * when solving a mixed u-p problem, it is important to identify the row associated to v and p. - * another example is the automatic prescription of rotation null-space for smoothed-aggregation solvers - * which require knowledge on the spatial position of the nodes associated to a given dof. - * This function is the place to eventually provide such data - */ - void ProvideAdditionalData ( - SparseMatrixType& rA, - VectorType& rX, - VectorType& rB, - typename ModelPart::DofsArrayType& rdof_set, - ModelPart& r_model_part - ) - { -KRATOS_WATCH(__LINE__) - int my_pid = rA.Comm().MyPID(); - - //filling the pressure mask - if(mp.size() != static_cast(rA.NumMyRows())) mp.resize( rA.NumMyRows() ); -KRATOS_WATCH(__LINE__) - int counter = 0; - for (ModelPart::DofsArrayType::iterator it = rdof_set.begin(); it!=rdof_set.end(); it++) - { - if( it->GetSolutionStepValue ( PARTITION_INDEX ) == my_pid ) - { - mp[counter] = (it->GetVariable().Key() == PRESSURE); - counter++; - } - } -KRATOS_WATCH(__LINE__) - if(counter != rA.NumMyRows()) - KRATOS_ERROR << "pressure mask as a size " << mp.size() << " which does not correspond with the number of local rows:" << rA.NumMyRows() << std::endl; - - } - - /** - * Print information about this object. - */ - void PrintInfo ( std::ostream& rOStream ) const - { - rOStream << "AMGCL_MPI solver finished."; - } - - /** - * Print object's data. - */ - void PrintData ( std::ostream& rOStream ) const - { - } - -private: - - double mtol; - int mmax_it; - int mverbosity; - unsigned int mcoarse_enough; - bool muse_linear_deflation = false; - bool mprovide_coordinates = false; - bool muse_block_matrices_if_possible = false; - - std::vector< char > mp; //pressure mask - boost::property_tree::ptree mprm; - - - /** - * Assignment operator. - */ - AmgclMPI_NS_Solver& operator= ( const AmgclMPI_NS_Solver& Other ); - - /** - * Copy constructor. - */ - AmgclMPI_NS_Solver ( const AmgclMPI_NS_Solver& Other ); - -}; - - -/** - * input stream function - */ -template -inline std::istream& operator >> ( std::istream& rIStream, AmgclMPI_NS_Solver< TSparseSpaceType, - TDenseSpaceType, TReordererType>& rThis ) -{ - return rIStream; -} - -/** - * output stream function - */ -template -inline std::ostream& operator << ( std::ostream& rOStream, - const AmgclMPI_NS_Solver& rThis ) -{ - rThis.PrintInfo ( rOStream ); - rOStream << std::endl; - rThis.PrintData ( rOStream ); - - return rOStream; -} - - -} // namespace Kratos. - -#endif // KRATOS_AMGCL_NS_MPI_SOLVER_H_INCLUDED defined - - diff --git a/applications/trilinos_application/tests/test_trilinos_linear_solvers.py b/applications/trilinos_application/tests/test_trilinos_linear_solvers.py index 0ecb886b28d7..913651faeba5 100644 --- a/applications/trilinos_application/tests/test_trilinos_linear_solvers.py +++ b/applications/trilinos_application/tests/test_trilinos_linear_solvers.py @@ -28,7 +28,6 @@ def _auxiliary_test_function(self, settings, matrix_name="A.mm", absolute_norm=F space.ReadMatrixMarketMatrix(GetFilePath(matrix_name),pA) n = space.Size1(pA.GetReference()) - pAoriginal = space.CreateEmptyMatrixPointer(comm) #create a copy of A space.ReadMatrixMarketMatrix(GetFilePath(matrix_name),pAoriginal) @@ -52,7 +51,7 @@ def _auxiliary_test_function(self, settings, matrix_name="A.mm", absolute_norm=F #construct the solver import trilinos_linear_solver_factory linear_solver = trilinos_linear_solver_factory.ConstructSolver(settings) - + #solve linear_solver.Solve(pA.GetReference(),px.GetReference(),pb.GetReference()) From f6b16c4323348129fab6a218a1f46dbffa7cd0d7 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Sat, 2 Sep 2017 22:42:45 +0200 Subject: [PATCH 09/12] solving the segfault - unfortunately still not selecting correctly the solvers --- .../amgcl_mpi_schur_complement_solver.h | 40 ++++++++++++++----- .../amgcl/mpi/schur_pressure_correction.hpp | 1 + 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h index ad3e7830edc7..95e183012666 100644 --- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h +++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -138,9 +139,9 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, mprm.put("precond.usolver.precond.type", rParameters["velocity_block_preconditioner"]["preconditioner_type"].GetString()); //setting pressure solver options - mprm.put("precond.psolver.solver.local.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); - mprm.put("precond.psolver.solver.local.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); - mprm.put("precond.psolver.solver.local.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); + mprm.put("precond.psolver.solver.type", rParameters["pressure_block_preconditioner"]["krylov_type"].GetString()); + mprm.put("precond.psolver.solver.tol", rParameters["pressure_block_preconditioner"]["tolerance"].GetDouble()); + mprm.put("precond.psolver.solver.maxiter", rParameters["pressure_block_preconditioner"]["max_iteration"].GetInt()); // mprm.put("precond.psolver.precond.local.relax.type", rParameters["pressure_block_preconditioner"]["preconditioner_type"].GetString()); // mprm.put("precond.psolver.precond.local.coarsening.aggr.eps_strong", 0.0); // mprm.put("precond.psolver.precond.local.coarsening.aggr.block_size", 1); @@ -203,24 +204,45 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, // >, // amgcl::runtime::iterative_solver // > SDD; + +// typedef amgcl::mpi::make_solver< +// amgcl::mpi::schur_pressure_correction< +// amgcl::mpi::make_solver< +// amgcl::mpi::block_preconditioner< +// amgcl::runtime::relaxation::as_preconditioner +// >, +// amgcl::solver::bicgstab +// >, +// amgcl::mpi::make_solver< +// amgcl::mpi::subdomain_deflation< +// amgcl::runtime::amg, +// amgcl::runtime::iterative_solver, +// amgcl::runtime::mpi::direct_solver +// >, +// amgcl::runtime::iterative_solver +// > +// >, +// amgcl::runtime::iterative_solver +// > SDD; + typedef amgcl::mpi::make_solver< amgcl::mpi::schur_pressure_correction< amgcl::mpi::make_solver< amgcl::mpi::block_preconditioner< - amgcl::runtime::relaxation::as_preconditioner + amgcl::runtime::relaxation::as_preconditioner >, - amgcl::runtime::solver::bicgstab + amgcl::runtime::iterative_solver //amgcl::solver::bicgstab >, amgcl::mpi::make_solver< amgcl::mpi::subdomain_deflation< - amg, - amgcl::runtime::solver::bicgstab, + amgcl::runtime::amg, + amgcl::runtime::iterative_solver, //amgcl::solver::bicgstab, amgcl::mpi::skyline_lu >, - amgcl::runtime::solver::fgmres + amgcl::solver::fgmres > >, - amgcl::runtime::solver::fgmres + amgcl::solver::fgmres > SDD; boost::function dv = amgcl::mpi::constant_deflation(1); diff --git a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp index afab65dbab86..f5e0ed3589b6 100644 --- a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp +++ b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp @@ -481,6 +481,7 @@ class schur_pressure_correction { u2x->col[u2x_head] = j; u2x->val[u2x_head] = math::identity(); + ++u2x_head; } } } From f2c5dbd13719665c6e538ea296656cf9b6ae3c7c Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Sun, 3 Sep 2017 11:14:27 +0200 Subject: [PATCH 10/12] adding some files which are present in the latest versions of amgcl --- external_libraries/amgcl/adapter/reorder.hpp | 288 +++++++++++ .../amgcl/reorder/cuthill_mckee.hpp | 193 ++++++++ external_libraries/amgcl/solver/idrs.hpp | 461 ++++++++++++++++++ 3 files changed, 942 insertions(+) create mode 100644 external_libraries/amgcl/adapter/reorder.hpp create mode 100644 external_libraries/amgcl/reorder/cuthill_mckee.hpp create mode 100644 external_libraries/amgcl/solver/idrs.hpp diff --git a/external_libraries/amgcl/adapter/reorder.hpp b/external_libraries/amgcl/adapter/reorder.hpp new file mode 100644 index 000000000000..c858f0e1f866 --- /dev/null +++ b/external_libraries/amgcl/adapter/reorder.hpp @@ -0,0 +1,288 @@ +#ifndef AMGCL_ADAPTER_REORDER_HPP +#define AMGCL_ADAPTER_REORDER_HPP + +/* +The MIT License + +Copyright (c) 2012-2017 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +\file amgcl/adapter/reorder.hpp +\author Denis Demidov +\brief On-the-fly reodering of matrix and vectors. +\ingroup adapters +*/ + +#include +#include +#include + + +#include +#include +#include + +namespace amgcl { +namespace adapter { + +template +struct reordered_matrix { + typedef typename backend::value_type::type value_type; + typedef typename backend::row_iterator::type base_iterator; + + const Matrix &A; + const ptrdiff_t * perm; + const ptrdiff_t * iperm; + + reordered_matrix(const Matrix &A, const ptrdiff_t *perm, const ptrdiff_t * iperm) + : A(A), perm(perm), iperm(iperm) + {} + + size_t rows() const { + return backend::rows(A); + } + + size_t cols() const { + return backend::cols(A); + } + + size_t nonzeros() const { + return backend::nonzeros(A); + } + + struct row_iterator { + base_iterator base; + const ptrdiff_t * iperm; + + row_iterator(const base_iterator &base, const ptrdiff_t *iperm) + : base(base), iperm(iperm) + {} + + operator bool() const { + return base; + } + + row_iterator& operator++() { + ++base; + return *this; + } + + ptrdiff_t col() const { + return iperm[base.col()]; + } + + value_type value() const { + return base.value(); + } + }; + + row_iterator row_begin(size_t i) const { + return row_iterator(backend::row_begin(A, perm[i]), iperm); + } +}; + +template +struct reordered_vector { + typedef typename backend::value_type::type>::type value_type; + + Vector &x; + const ptrdiff_t *perm; + + reordered_vector(Vector &x, const ptrdiff_t *perm) : x(x), perm(perm) {} + + size_t size() const { + return boost::size(x); + } + + const value_type& operator[](size_t i) const { + return x[perm[i]]; + } + + value_type& operator[](size_t i) { + return x[perm[i]]; + } + + boost::permutation_iterator< + typename boost::decay::type::iterator, + const ptrdiff_t* + > + begin() { + return boost::make_permutation_iterator(boost::begin(x), perm); + } + + boost::permutation_iterator< + typename boost::decay::type::const_iterator, + const ptrdiff_t* + > + begin() const { + return boost::make_permutation_iterator(boost::begin(x), perm); + } + + boost::permutation_iterator< + typename boost::decay::type::iterator, + const ptrdiff_t* + > + end() { + return boost::make_permutation_iterator(boost::end(x), perm + size()); + } + + boost::permutation_iterator< + typename boost::decay::type::const_iterator, + const ptrdiff_t* + > + end() const { + return boost::make_permutation_iterator(boost::end(x), perm + size()); + } +}; + +} // namespace adapter + +namespace backend { + +//--------------------------------------------------------------------------- +// Specialization of matrix interface +//--------------------------------------------------------------------------- +template +struct value_type< adapter::reordered_matrix > + : value_type +{}; + +template +struct rows_impl< adapter::reordered_matrix > +{ + static size_t get(const adapter::reordered_matrix &A) { + return A.rows(); + } +}; + +template +struct cols_impl< adapter::reordered_matrix > +{ + static size_t get(const adapter::reordered_matrix &A) { + return A.cols(); + } +}; + +template +struct nonzeros_impl< adapter::reordered_matrix > +{ + static size_t get(const adapter::reordered_matrix &A) { + return A.nonzeros(); + } +}; + +template +struct row_iterator< adapter::reordered_matrix > +{ + typedef + typename adapter::reordered_matrix::row_iterator + type; +}; + +template +struct row_begin_impl< adapter::reordered_matrix > +{ + typedef adapter::reordered_matrix M; + static typename row_iterator::type + get(const M &matrix, size_t row) { + return matrix.row_begin(row); + } +}; + +namespace detail { + +template +struct use_builtin_matrix_ops< adapter::reordered_matrix > + : boost::true_type +{}; + +} // namespace detail + + +template +struct is_builtin_vector< adapter::reordered_vector > + : is_builtin_vector::type> +{}; + +} // namespace backend + +namespace adapter { + +template > +class reorder { + public: + template + reorder(const Matrix &A) : n(backend::rows(A)), perm(n), iperm(n) + { + ordering::get(A, perm); +#pragma omp parallel for + for(ptrdiff_t i = 0; i < n; ++i) iperm[perm[i]] = i; + } + + template + typename boost::disable_if< + backend::is_builtin_vector, + reordered_matrix + >::type + operator()(const Matrix &A) const { + return reordered_matrix(A, perm.data(), iperm.data()); + } + + template + typename boost::enable_if< + backend::is_builtin_vector, + reordered_vector + >::type + operator()(Vector &x) const { + return reordered_vector(x, perm.data()); + } + + template + typename boost::enable_if< + backend::is_builtin_vector, + reordered_vector + >::type + operator()(const Vector &x) const { + return reordered_vector(x, perm.data()); + } + + template + void forward(const Vector1 &x, Vector2 &y) const { +#pragma omp parallel for + for(ptrdiff_t i = 0; i < n; ++i) y[i] = x[perm[i]]; + } + + template + void inverse(const Vector1 &x, Vector2 &y) const { +#pragma omp parallel for + for(ptrdiff_t i = 0; i < n; ++i) y[perm[i]] = x[i]; + } + + private: + ptrdiff_t n; + backend::numa_vector perm, iperm; +}; + +} // namespace adapter +} // namespace amgcl + +#endif diff --git a/external_libraries/amgcl/reorder/cuthill_mckee.hpp b/external_libraries/amgcl/reorder/cuthill_mckee.hpp new file mode 100644 index 000000000000..aa97926c7346 --- /dev/null +++ b/external_libraries/amgcl/reorder/cuthill_mckee.hpp @@ -0,0 +1,193 @@ +#ifndef AMGCL_REORDER_CUTHILL_MCKEE_HPP +#define AMGCL_REORDER_CUTHILL_MCKEE_HPP + +/* +The MIT License + +Copyright (c) 2012-2017 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +\file amgcl/reorder/cuthill_mckee.hpp +\author Denis Demidov +\brief (Reverse) Cuthill-McKee matrix reorder algorithm. + +The code is adopted from Kratos project http://www.cimne.com/kratos. The +original code came with the following copyright notice: +\verbatim +Kratos Multi-Physics + +Copyright (c) 2012, Pooyan Dadvand, Riccardo Rossi, CIMNE (International Center for Numerical Methods in Engineering) +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + All advertising materials mentioning features or use of this software must + display the following acknowledgement: + This product includes Kratos Multi-Physics technology. + Neither the name of the CIMNE nor the names of its contributors may be used + to endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED ANDON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THISSOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +\endverbatim +*/ + +#include +#include + +#include +#include +#include + +namespace amgcl { +namespace reorder { + +template +struct cuthill_mckee { + template + static void get(const Matrix &A, Vector &perm) { + typedef typename backend::row_iterator::type row_iterator; + const ptrdiff_t n = backend::rows(A); + + /* The data structure used to sort and traverse the level sets: + * + * The current level set is currentLevelSet; + * In this level set, there are nodes with degrees from 0 (not really + * useful) to maxDegreeInCurrentLevelSet. + * firstWithDegree[i] points to a node with degree i, or to -1 if it + * does not exist. nextSameDegree[firstWithDegree[i]] points to the + * second node with that degree, etc. + * While the level set is being traversed, the structure for the next + * level set is generated; nMDICLS will be the next + * maxDegreeInCurrentLevelSet and nFirstWithDegree will be + * firstWithDegree. + */ + ptrdiff_t initialNode = 0; // node to start search + ptrdiff_t maxDegree = 0; + + std::vector degree(n); + std::vector levelSet(n, 0); + std::vector nextSameDegree(n, -1); + +#pragma omp parallel + { + ptrdiff_t maxd = 0; +#pragma omp for + for(ptrdiff_t i = 0; i < n; ++i) { + ptrdiff_t row_width = 0; + for(row_iterator a = backend::row_begin(A, i); a; ++a, ++row_width); + degree[i] = row_width; + maxd = std::max(maxd, degree[i]); + } +#pragma omp critical + { + maxDegree = std::max(maxDegree, maxd); + } + } + + std::vector firstWithDegree(maxDegree + 1, -1); + std::vector nFirstWithDegree(maxDegree + 1); + + // Initialize the first level set, made up by initialNode alone + perm[0] = initialNode; + ptrdiff_t currentLevelSet = 1; + levelSet[initialNode] = currentLevelSet; + ptrdiff_t maxDegreeInCurrentLevelSet = degree[initialNode]; + firstWithDegree[maxDegreeInCurrentLevelSet] = initialNode; + + // Main loop + for (ptrdiff_t next = 1; next < n; ) { + ptrdiff_t nMDICLS = 0; + std::fill(nFirstWithDegree.begin(), nFirstWithDegree.end(), -1); + bool empty = true; // used to detect different connected components + + ptrdiff_t firstVal = reverse ? maxDegreeInCurrentLevelSet : 0; + ptrdiff_t finalVal = reverse ? -1 : maxDegreeInCurrentLevelSet + 1; + ptrdiff_t increment = reverse ? -1 : 1; + + for(ptrdiff_t soughtDegree = firstVal; soughtDegree != finalVal; soughtDegree += increment) + { + ptrdiff_t node = firstWithDegree[soughtDegree]; + while (node > 0) { + // Visit neighbors + for(row_iterator a = backend::row_begin(A, node); a; ++a) { + ptrdiff_t c = a.col(); + if (levelSet[c] == 0) { + levelSet[c] = currentLevelSet + 1; + perm[next] = c; + ++next; + empty = false; // this level set is not empty + nextSameDegree[c] = nFirstWithDegree[degree[c]]; + nFirstWithDegree[degree[c]] = c; + nMDICLS = std::max(nMDICLS, degree[c]); + } + } + node = nextSameDegree[node]; + } + } + + ++currentLevelSet; + maxDegreeInCurrentLevelSet = nMDICLS; + for(ptrdiff_t i = 0; i <= nMDICLS; ++i) + firstWithDegree[i] = nFirstWithDegree[i]; + + if (empty) { + // The graph contains another connected component that we + // cannot reach. Search for a node that has not yet been + // included in a level set, and start exploring from it. + bool found = false; + for(ptrdiff_t i = 0; i < n; ++i) { + if (levelSet[i] == 0) { + perm[next] = i; + ++next; + levelSet[i] = currentLevelSet; + maxDegreeInCurrentLevelSet = degree[i]; + firstWithDegree[maxDegreeInCurrentLevelSet] = i; + found = true; + break; + } + } + precondition(found, "Internal consistency error at skyline_lu"); + } + } + } +}; + +} // namespace reorder +} // namespace amgcl + +#endif diff --git a/external_libraries/amgcl/solver/idrs.hpp b/external_libraries/amgcl/solver/idrs.hpp new file mode 100644 index 000000000000..0f6866203f33 --- /dev/null +++ b/external_libraries/amgcl/solver/idrs.hpp @@ -0,0 +1,461 @@ +#ifndef AMGCL_SOLVER_IDRS_HPP +#define AMGCL_SOLVER_IDRS_HPP + +/* +The MIT License + +Copyright (c) 2012-2017 Denis Demidov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +\file idrs.hpp +\author Denis Demidov +\brief IDR(s) method. + +The code is ported from Matlab code published at +http://ta.twi.tudelft.nl/nw/users/gijzen/IDR.html. + +This is a very stable and efficient IDR(s) variant (implemented in the MATLAB +code idrs.m given above) as described in: Martin B. van Gijzen and Peter +Sonneveld, Algorithm 913: An Elegant IDR(s) Variant that Efficiently Exploits +Bi-orthogonality Properties. ACM Transactions on Mathematical Software, Vol. +38, No. 1, pp. 5:1-5:19, 2011 (copyright ACM). +*/ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#ifdef _OPENMP +# include +#endif + +namespace amgcl { +namespace solver { + +/// IDR(s) method (Induced Dimension Reduction) +template < + class Backend, + class InnerProduct = detail::default_inner_product + > +class idrs { + public: + typedef Backend backend_type; + + typedef typename Backend::vector vector; + typedef typename Backend::value_type value_type; + typedef typename Backend::params backend_params; + + typedef typename math::scalar_of::type scalar_type; + typedef typename math::rhs_of::type rhs_type; + + typedef typename math::inner_product_impl< + typename math::rhs_of::type + >::return_type coef_type; + + /// Solver parameters. + struct params { + /// Dimension of the shadow space in IDR(s). + unsigned s; + + /// Computation of omega. + /** + * If omega = 0: a standard minimum residual step is performed + * If omega > 0: omega is increased if + * the cosine of the angle between Ar and r < omega + * Default: omega = 0.7; + */ + scalar_type omega; + + /// Specifies if residual smoothing must be applied. + bool smoothing; + + /// Residual replacement. + /** + * Determines the residual replacement strategy. + * If |r| > 1E3 |b| TOL/EPS) (EPS is the machine precision) + * the recursively computed residual is replaced by the true residual + * once |r| < |b| (to reduce the effect of large intermediate residuals + * on the final accuracy). + * Default: No residual replacement. + */ + bool replacement; + + /// Maximum number of iterations. + unsigned maxiter; + + /// Target relative residual error. + scalar_type tol; + + /// Target absolute residual error. + scalar_type abstol; + + params( + unsigned s = 4, + scalar_type omega = 0.7, + bool smoothing = false, + bool replacement = false, + unsigned maxiter = 100, + scalar_type tol = 1e-8 + ) : + s(s), omega(omega), smoothing(smoothing), + replacement(replacement), maxiter(maxiter), tol(tol), + abstol(std::numeric_limits::min()) + { } + + params(const boost::property_tree::ptree &p) + : AMGCL_PARAMS_IMPORT_VALUE(p, s), + AMGCL_PARAMS_IMPORT_VALUE(p, omega), + AMGCL_PARAMS_IMPORT_VALUE(p, smoothing), + AMGCL_PARAMS_IMPORT_VALUE(p, replacement), + AMGCL_PARAMS_IMPORT_VALUE(p, maxiter), + AMGCL_PARAMS_IMPORT_VALUE(p, tol), + AMGCL_PARAMS_IMPORT_VALUE(p, abstol) + { + AMGCL_PARAMS_CHECK(p, (s)(omega)(smoothing)(replacement)(maxiter)(tol)(abstol)); + } + + void get(boost::property_tree::ptree &p, const std::string &path) const { + AMGCL_PARAMS_EXPORT_VALUE(p, path, s); + AMGCL_PARAMS_EXPORT_VALUE(p, path, omega); + AMGCL_PARAMS_EXPORT_VALUE(p, path, smoothing); + AMGCL_PARAMS_EXPORT_VALUE(p, path, replacement); + AMGCL_PARAMS_EXPORT_VALUE(p, path, maxiter); + AMGCL_PARAMS_EXPORT_VALUE(p, path, tol); + AMGCL_PARAMS_EXPORT_VALUE(p, path, abstol); + } + } prm; + + /// Preallocates necessary data structures for the system of size \p n. + idrs( + size_t n, + const params &prm = params(), + const backend_params &bprm = backend_params(), + const InnerProduct &inner_product = InnerProduct() + ) + : prm(prm), n(n), inner_product(inner_product), + M(boost::extents[prm.s][prm.s]), + f(prm.s), c(prm.s), + r(Backend::create_vector(n, bprm)), + v(Backend::create_vector(n, bprm)), + t(Backend::create_vector(n, bprm)) + { + static const scalar_type one = math::identity(); + static const scalar_type zero = math::zero(); + + if (prm.smoothing) { + x_s = Backend::create_vector(n, bprm); + r_s = Backend::create_vector(n, bprm); + } + + G.reserve(prm.s); + U.reserve(prm.s); + for(unsigned i = 0; i < prm.s; ++i) { + G.push_back(Backend::create_vector(n, bprm)); + U.push_back(Backend::create_vector(n, bprm)); + } + + // Initialize P. + P.reserve(prm.s); + { + std::vector p(n); + +#pragma omp parallel + { +#ifdef _OPENMP + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + + boost::random::mt19937 rng(tid); + boost::random::normal_distribution rnd; + + for(unsigned j = 0; j < prm.s; ++j) { +#pragma omp for + for(ptrdiff_t i = 0; i < static_cast(n); ++i) + p[i] = math::constant(rnd(rng)); + +#pragma omp single + { + P.push_back(Backend::copy_vector(p, bprm)); + } + } + } + + for(unsigned j = 0; j < prm.s; ++j) { + for(unsigned k = 0; k < j; ++k) { + coef_type alpha = inner_product(*P[k], *P[j]); + backend::axpby(-alpha, *P[k], one, *P[j]); + } + scalar_type norm_pj = norm(*P[j]); + backend::axpby(math::inverse(norm_pj), *P[j], zero, *P[j]); + } + } + } + + /* Computes the solution for the given system matrix \p A and the + * right-hand side \p rhs. Returns the number of iterations made and + * the achieved residual as a ``boost::tuple``. The solution vector + * \p x provides initial approximation in input and holds the computed + * solution on output. + * + * The system matrix may differ from the matrix used during + * initialization. This may be used for the solution of non-stationary + * problems with slowly changing coefficients. There is a strong chance + * that a preconditioner built for a time step will act as a reasonably + * good preconditioner for several subsequent time steps [DeSh12]_. + */ + template + boost::tuple operator()( + Matrix const &A, + Precond const &Prec, + Vec1 const &rhs, + Vec2 &x + ) const + { + static const scalar_type one = math::identity(); + static const scalar_type zero = math::zero(); + + scalar_type norm_rhs = norm(rhs); + if (norm_rhs < amgcl::detail::eps(n)) { + backend::clear(x); + return boost::make_tuple(0, norm_rhs); + } + + scalar_type eps = std::max(prm.tol * norm_rhs, prm.abstol); + + // Compute initial residual: + backend::residual(rhs, A, x, *r); + + scalar_type res_norm = norm(*r); + if (res_norm <= eps) { + // Initial guess is a good enough solution. + return boost::make_tuple(0, res_norm / norm_rhs); + } + + if (prm.smoothing) { + backend::copy( x, *x_s); + backend::copy(*r, *r_s); + } + + // Initialization. + coef_type om = math::identity(); + + for(unsigned i = 0; i < prm.s; ++i) { + backend::clear(*G[i]); + backend::clear(*U[i]); + + for(unsigned j = 0; j < prm.s; ++j) + M[i][j] = (i == j); + } + + scalar_type eps_replace = norm_rhs / ( + // Number close to machine precision: + 1e3 * std::numeric_limits::epsilon()); + + // Main iteration loop, build G-spaces: + size_t iter = 0; + bool trueres = false; + while(iter < prm.maxiter && res_norm > eps) { + // New righ-hand size for small system: + for(unsigned i = 0; i < prm.s; ++i) + f[i] = inner_product(*r, *P[i]); + + for(unsigned k = 0; k < prm.s; ++k) { + // Compute new v + backend::copy(*r, *v); + + // Solve small system (Note: M is lower triangular) + // and make v orthogonal to P: + for(unsigned i = k; i < prm.s; ++i) { + c[i] = f[i]; + for(unsigned j = k; j < i; ++j) + c[i] -= M[i][j] * c[j]; + c[i] = math::inverse(M[i][i]) * c[i]; + + backend::axpby(-c[i], *G[i], one, *v); + } + + Prec.apply(*v, *t); + + // Compute new U[k] + backend::axpby(om, *t, c[k], *U[k]); + for(unsigned i = k+1; i < prm.s; ++i) + backend::axpby(c[i], *U[i], one, *U[k]); + + // Compute new G[k], G[k] is in space G_j + backend::spmv(one, A, *U[k], zero, *G[k]); + + // Bi-Orthogonalise the new basis vectors: + for(unsigned i = 0; i < k; ++i) { + coef_type alpha = inner_product(*G[k], *P[i]) / M[i][i]; + + backend::axpby(-alpha, *G[i], one, *G[k]); + backend::axpby(-alpha, *U[i], one, *U[k]); + } + + // New column of M = P'*G (first k-1 entries are zero) + for(unsigned i = k; i < prm.s; ++i) + M[i][k] = backend::inner_product(*G[k], *P[i]); + + precondition(!math::is_zero(M[k][k]), "IDR(s) breakdown: zero M[k,k]"); + + // Make r orthogonal to q_i, i = [0..k) + coef_type beta = math::inverse(M[k][k]) * f[k]; + backend::axpby(-beta, *G[k], one, *r); + backend::axpby( beta, *U[k], one, x); + + res_norm = norm(*r); + + if (prm.replacement && res_norm > eps_replace) + trueres = true; + + // Smoothing + if (prm.smoothing) { + backend::axpbypcz(one, *r_s, -one, *r, zero, *t); + coef_type gamma = inner_product(*t, *r_s) / inner_product(*t, *t); + backend::axpby(-gamma, *t, one, *r_s); + backend::axpbypcz(-gamma, *x_s, gamma, x, one, *x_s); + res_norm = norm(*r_s); + } + + if (res_norm <= eps || ++iter >= prm.maxiter) break; + + // New f = P'*r (first k components are zero) + for(unsigned i = k + 1; i < prm.s; ++i) + f[i] -= beta * M[i][k]; + } + + if (res_norm <= eps || iter >= prm.maxiter) break; + + // Now we have sufficient vectors in G_j to compute residual in G_j+1 + // Note: r is already perpendicular to P so v = r + + Prec.apply(*r, *v); + backend::spmv(one, A, *v, zero, *t); + + // Computation of a new omega + om = omega(*t, *r); + precondition(!math::is_zero(om), "IDR(s) breakdown: zero omega"); + + backend::axpby(-om, *t, one, *r); + backend::axpby( om, *v, one, x); + + res_norm = norm(*r); + if (prm.replacement && res_norm > eps_replace) + trueres = true; + + // Residual replacement? + if (trueres && res_norm < norm_rhs) { + trueres = 0; + backend::residual(rhs, A, x, *r); + } + + // Smoothing. + if (prm.smoothing) { + backend::axpbypcz(one, *r_s, -one, *r, zero, *t); + coef_type gamma = inner_product(*t, *r_s) / inner_product(*t, *t); + backend::axpby(-gamma, *t, one, *r_s); + backend::axpbypcz(-gamma, *x_s, gamma, x, one, *x_s); + res_norm = norm(*r_s); + } + + ++iter; + } + + if (prm.smoothing) + backend::copy(*x_s, x); + + backend::residual(rhs, A, x, *r); + res_norm = norm(*r); + + return boost::make_tuple(iter, res_norm / norm_rhs); + } + + /* Computes the solution for the given right-hand side \p rhs. The + * system matrix is the same that was used for the setup of the + * preconditioner \p P. Returns the number of iterations made and the + * achieved residual as a ``boost::tuple``. The solution vector \p x + * provides initial approximation in input and holds the computed + * solution on output. + */ + template + boost::tuple operator()( + Precond const &P, + Vec1 const &rhs, + Vec2 &x + ) const + { + return (*this)(P.system_matrix(), P, rhs, x); + } + + friend std::ostream& operator<<(std::ostream &os, const idrs &s) { + return os << "IDR(" << s.prm.s << "): " << s.n << " unknowns"; + } + + private: + size_t n; + + InnerProduct inner_product; + + mutable boost::multi_array M; + mutable std::vector f, c; + + boost::shared_ptr r, v, t; + boost::shared_ptr x_s; + boost::shared_ptr r_s; + + std::vector< boost::shared_ptr > P, G, U; + + + template + scalar_type norm(const Vec &x) const { + return std::abs(sqrt(inner_product(x, x))); + } + + template + coef_type omega(const Vector1 &t, const Vector2 &s) const { + scalar_type norm_t = norm(t); + scalar_type norm_s = norm(s); + + coef_type ts = inner_product(t, s); + scalar_type rho = math::norm(ts / (norm_t * norm_s)); + coef_type om = ts / (norm_t * norm_t); + + if (rho < prm.omega) + om *= prm.omega/rho; + + return om; + } +}; + +} // namespace solver +} // namespace amgcl + +#endif From a9eb953e70a7425a395c39ab34df0bff2ed9ad73 Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 5 Sep 2017 09:47:07 +0200 Subject: [PATCH 11/12] correction to make the mpi solver to work correctly --- .../amgcl/mpi/schur_pressure_correction.hpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp index f5e0ed3589b6..60711c9a17e0 100644 --- a/external_libraries/amgcl/mpi/schur_pressure_correction.hpp +++ b/external_libraries/amgcl/mpi/schur_pressure_correction.hpp @@ -418,8 +418,22 @@ class schur_pressure_correction { tmp = backend_type::create_vector(nu, bprm); - if (prm.approx_schur) - M = backend_type::copy_vector(diagonal(*Kuu, /*invert = */true), bprm); + if (prm.approx_schur) { + M = backend_type::create_vector(nu, bprm); + +#pragma omp parallel + for(ptrdiff_t i = 0; i < nu; ++i) { + // Keep in mind Kuu has global column numeration: + ptrdiff_t dia = i + u_beg; + value_type v = math::zero(); + for(ptrdiff_t j = Kuu->ptr[i], e = Kuu->ptr[i+1]; j < e; ++j) { + if (Kuu->col[j] == dia) { + v = math::inverse(Kuu->val[j]); + } + } + (*M)[i] = v; + } + } // Scatter/Gather matrices boost::shared_ptr x2u = boost::make_shared(); From 6615d272628dae7b2907294d3f06c4c22e58a93b Mon Sep 17 00:00:00 2001 From: Riccardo Rossi Date: Tue, 5 Sep 2017 11:23:27 +0200 Subject: [PATCH 12/12] corrections to the solver settings --- .../amgcl_mpi_schur_complement_solver.h | 103 +++++++++++------- 1 file changed, 66 insertions(+), 37 deletions(-) diff --git a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h index 95e183012666..94f97c7368bb 100644 --- a/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h +++ b/applications/trilinos_application/external_includes/amgcl_mpi_schur_complement_solver.h @@ -91,7 +91,7 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, }, "pressure_block_preconditioner" : { - "krylov_type" : "lgmres", + "krylov_type" : "fgmres", "tolerance" : 1e-2, "preconditioner_type" : "spai0", "max_iteration": 20 @@ -123,6 +123,35 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, } mCoarseEnough = rParameters["coarse_enough"].GetInt(); + +// { +// "solver": { +// "type" : "fgmres", +// "M": 50 +// }, +// "precond": { +// "usolver": { +// "solver": { +// "type" : "gmres", +// "tol": 0.001, +// "maxiter": 5 +// } +// }, +// "psolver": { +// "solver": { +// "type" : "gmres", +// "tol": 0.01, +// "maxiter": 20 +// }, +// "precond" : { +// "isolver" : { +// "type" : "gmres", +// "maxiter" : 2 +// } +// } +// } +// } +// } mTolerance = rParameters["tolerance"].GetDouble(); mMaxIterations = rParameters["max_iteration"].GetInt(); @@ -185,13 +214,33 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, typedef amgcl::backend::builtin Backend; + typedef amgcl::mpi::make_solver< + amgcl::mpi::schur_pressure_correction< + amgcl::mpi::make_solver< + amgcl::mpi::block_preconditioner< + amgcl::runtime::relaxation::as_preconditioner + >, + amgcl::runtime::iterative_solver + >, + amgcl::mpi::make_solver< + amgcl::mpi::subdomain_deflation< + amgcl::runtime::amg, + amgcl::runtime::iterative_solver, + amgcl::runtime::mpi::direct_solver + >, + amgcl::runtime::iterative_solver + > + >, + amgcl::runtime::iterative_solver + > SDD; + // typedef amgcl::mpi::make_solver< // amgcl::mpi::schur_pressure_correction< // amgcl::mpi::make_solver< // amgcl::mpi::block_preconditioner< // amgcl::runtime::relaxation::as_preconditioner // >, -// amgcl::runtime::iterative_solver +// amgcl::solver::bicgstab // >, // amgcl::mpi::make_solver< // amgcl::mpi::subdomain_deflation< @@ -204,46 +253,26 @@ class AmgclMPISchurComplementSolver : public LinearSolver< TSparseSpaceType, // >, // amgcl::runtime::iterative_solver // > SDD; - -// typedef amgcl::mpi::make_solver< -// amgcl::mpi::schur_pressure_correction< -// amgcl::mpi::make_solver< -// amgcl::mpi::block_preconditioner< -// amgcl::runtime::relaxation::as_preconditioner + +// typedef amgcl::mpi::make_solver< +// amgcl::mpi::schur_pressure_correction< +// amgcl::mpi::make_solver< +// amgcl::mpi::block_preconditioner< +// amgcl::runtime::relaxation::as_preconditioner // >, -// amgcl::solver::bicgstab +// amgcl::solver::gmres //amgcl::solver::bicgstab // >, -// amgcl::mpi::make_solver< -// amgcl::mpi::subdomain_deflation< -// amgcl::runtime::amg, -// amgcl::runtime::iterative_solver, -// amgcl::runtime::mpi::direct_solver +// amgcl::mpi::make_solver< +// amgcl::mpi::subdomain_deflation< +// amgcl::runtime::amg, +// amgcl::solver::gmres, //amgcl::solver::bicgstab, +// amgcl::mpi::skyline_lu // >, -// amgcl::runtime::iterative_solver +// amgcl::solver::fgmres // > // >, -// amgcl::runtime::iterative_solver -// > SDD; - - typedef amgcl::mpi::make_solver< - amgcl::mpi::schur_pressure_correction< - amgcl::mpi::make_solver< - amgcl::mpi::block_preconditioner< - amgcl::runtime::relaxation::as_preconditioner - >, - amgcl::runtime::iterative_solver //amgcl::solver::bicgstab - >, - amgcl::mpi::make_solver< - amgcl::mpi::subdomain_deflation< - amgcl::runtime::amg, - amgcl::runtime::iterative_solver, //amgcl::solver::bicgstab, - amgcl::mpi::skyline_lu - >, - amgcl::solver::fgmres - > - >, - amgcl::solver::fgmres - > SDD; +// amgcl::solver::fgmres +// > SDD; boost::function dv = amgcl::mpi::constant_deflation(1); mprm.put("precond.psolver.precond.num_def_vec", 1);