47 #ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
48 #define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
56 #include <Teuchos_CommHelpers.hpp>
59 #include <Xpetra_Matrix.hpp>
61 #include "MueLu_Utilities.hpp"
63 #include "MueLu_RAPFactory.hpp"
64 #include "MueLu_BlockedRAPFactory.hpp"
65 #include "MueLu_SubBlockAFactory.hpp"
70 #include "MueLu_RepartitionHeuristicFactory.hpp"
74 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
78 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
87 #undef SET_VALID_ENTRY
91 validParamList->
set<
RCP<const FactoryBase> >(
"Node Comm", Teuchos::null,
"Generating factory of the node level communicator");
93 return validParamList;
96 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
100 const bool useMap = pL.
get<
bool>(
"repartition: use map");
102 Input(currentLevel,
"Map");
104 Input(currentLevel,
"A");
106 Input(currentLevel,
"A");
107 if(pL.
isParameter(
"repartition: node repartition level")) {
108 const int nodeRepartLevel = pL.
get<
int>(
"repartition: node repartition level");
109 if(currentLevel.
GetLevelID() == nodeRepartLevel) {
110 Input(currentLevel,
"Node Comm");
115 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
122 const int startLevel = pL.
get<
int> (
"repartition: start level");
123 const int nodeRepartLevel = pL.
get<
int> (
"repartition: node repartition level");
124 LO minRowsPerProcess = pL.
get<LO> (
"repartition: min rows per proc");
125 LO targetRowsPerProcess = pL.
get<LO> (
"repartition: target rows per proc");
126 LO minRowsPerThread = pL.
get<LO> (
"repartition: min rows per thread");
127 LO targetRowsPerThread = pL.
get<LO> (
"repartition: target rows per thread");
128 const double nonzeroImbalance = pL.
get<
double>(
"repartition: max imbalance");
129 const bool useMap = pL.
get<
bool> (
"repartition: use map");
131 int thread_per_mpi_rank = 1;
132 #if defined(HAVE_MUELU_KOKKOSCORE) && defined(KOKKOS_ENABLE_OPENMP)
133 using execution_space =
typename Node::device_type::execution_space;
134 if (std::is_same<execution_space, Kokkos::OpenMP>::value)
135 thread_per_mpi_rank = execution_space::concurrency();
138 if (minRowsPerThread > 0)
140 minRowsPerProcess = minRowsPerThread*thread_per_mpi_rank;
142 if (targetRowsPerThread == 0)
143 targetRowsPerThread = minRowsPerThread;
145 if (targetRowsPerThread > 0)
147 targetRowsPerProcess = targetRowsPerThread*thread_per_mpi_rank;
149 if (targetRowsPerProcess == 0)
150 targetRowsPerProcess = minRowsPerProcess;
153 Set<LO>(currentLevel,
"repartition: heuristic target rows per process",targetRowsPerProcess);
163 Afact = GetFactory(
"A");
164 if(!Afact.
is_null() && Teuchos::rcp_dynamic_cast<
const RAPFactory>(Afact) == Teuchos::null &&
165 Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
166 Teuchos::rcp_dynamic_cast<
const SubBlockAFactory>(Afact) == Teuchos::null) {
168 "MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must " \
169 "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! " \
170 "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. " \
171 "Please check the input. Make also sure that \"number of partitions\" is provided to " \
172 "the Interface class and the RepartitionFactory instance. Instead, we have a "<<Afact->
description() << std::endl;
175 A = Get< RCP<Matrix> >(currentLevel,
"A");
176 map = A->getRowMap();
178 map = Get< RCP<const Map> >(currentLevel,
"Map");
188 if (currentLevel.GetLevelID() == nodeRepartLevel && map->getComm()->getSize() > 1) {
193 if(NodeComm()->getSize() != map->getComm()->getSize()) {
194 GetOStream(
Statistics1) <<
"Repartitioning? YES: \n Within node only"<<std::endl;
195 int nodeRank = NodeComm->getRank();
198 int isZero = (nodeRank == 0);
200 Teuchos::reduceAll(*map->getComm(),
Teuchos::REDUCE_SUM, isZero, Teuchos::outArg(numNodes));
201 Set(currentLevel,
"number of partitions", numNodes);
207 if (currentLevel.GetLevelID() < startLevel) {
208 GetOStream(
Statistics1) <<
"Repartitioning? NO:" <<
210 ", first level where repartitioning can happen is " +
Teuchos::toString(startLevel) << std::endl;
213 Set(currentLevel,
"number of partitions", -1);
227 if (comm->getSize() == 1 && Teuchos::rcp_dynamic_cast<
const RAPFactory>(Afact) != Teuchos::null) {
228 GetOStream(
Statistics1) <<
"Repartitioning? NO:" <<
229 "\n comm size = 1" << std::endl;
231 Set(currentLevel,
"number of partitions", -1);
235 int numActiveProcesses = 0;
236 MueLu_sumAll(comm, Teuchos::as<int>((map->getNodeNumElements() > 0) ? 1 : 0), numActiveProcesses);
238 if (numActiveProcesses == 1) {
239 GetOStream(
Statistics1) <<
"Repartitioning? NO:" <<
240 "\n # processes with rows = " <<
Teuchos::toString(numActiveProcesses) << std::endl;
242 Set(currentLevel,
"number of partitions", 1);
247 bool test3 =
false, test4 =
false;
248 std::string msg3, msg4;
252 if (minRowsPerProcess > 0) {
254 LO haveFewRows = (numMyRows < minRowsPerProcess ? 1 : 0), numWithFewRows = 0;
256 MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
261 if (numWithFewRows > 0)
272 GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries());
274 MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz);
275 double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
277 if (imbalance > nonzeroImbalance)
284 if (!test3 && !test4) {
285 GetOStream(
Statistics1) <<
"Repartitioning? NO:" << msg3 + msg4 << std::endl;
288 Set(currentLevel,
"number of partitions", -1);
292 GetOStream(
Statistics1) <<
"Repartitioning? YES:" << msg3 + msg4 << std::endl;
305 const auto globalNumRows = Teuchos::as<GO>(map->getGlobalNumElements());
306 int numPartitions = 1;
307 if (globalNumRows >= targetRowsPerProcess) {
309 numPartitions = std::max(Teuchos::as<int>(globalNumRows / targetRowsPerProcess), 1);
311 numPartitions = std::min(numPartitions, comm->getSize());
313 Set(currentLevel,
"number of partitions", numPartitions);
315 GetOStream(
Statistics1) <<
"Number of partitions to use = " << numPartitions << std::endl;
319 #endif //ifdef HAVE_MPI
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_maxAll(rcpComm, in, out)
void Build(Level ¤tLevel) const
Build an object with this factory.
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define SET_VALID_ENTRY(name)
#define MueLu_minAll(rcpComm, in, out)
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Factory for building a thresholded operator.
void DeclareInput(Level ¤tLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
int GetLevelID() const
Return level number.
Exception throws to report errors in the internal logical of the program.
Print all warning messages.
Factory for building coarse matrices.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
virtual std::string description() const
Return a simple one-line description of this object.
std::string toString(const T &t)