10 #ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
11 #define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
19 #include <Teuchos_CommHelpers.hpp>
24 #include "MueLu_RAPFactory.hpp"
25 #include "MueLu_BlockedRAPFactory.hpp"
26 #include "MueLu_SubBlockAFactory.hpp"
35 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
38 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
41 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
45 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
55 #undef SET_VALID_ENTRY
59 validParamList->
set<
RCP<const FactoryBase> >(
"Node Comm", Teuchos::null,
"Generating factory of the node level communicator");
61 return validParamList;
64 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
68 const bool useMap = pL.
get<
bool>(
"repartition: use map");
70 Input(currentLevel,
"Map");
72 Input(currentLevel,
"A");
74 Input(currentLevel,
"A");
75 if (pL.
isParameter(
"repartition: node repartition level")) {
76 const int nodeRepartLevel = pL.
get<
int>(
"repartition: node repartition level");
77 if (currentLevel.
GetLevelID() == nodeRepartLevel) {
78 Input(currentLevel,
"Node Comm");
83 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
90 const int startLevel = pL.
get<
int>(
"repartition: start level");
91 const int nodeRepartLevel = pL.
get<
int>(
"repartition: node repartition level");
92 LO minRowsPerProcess = pL.
get<
LO>(
"repartition: min rows per proc");
93 LO targetRowsPerProcess = pL.
get<
LO>(
"repartition: target rows per proc");
94 LO minRowsPerThread = pL.
get<
LO>(
"repartition: min rows per thread");
95 LO targetRowsPerThread = pL.
get<
LO>(
"repartition: target rows per thread");
96 LO putOnSingleProc = pL.
get<
LO>(
"repartition: put on single proc");
97 const double nonzeroImbalance = pL.
get<
double>(
"repartition: max imbalance");
98 const bool useMap = pL.
get<
bool>(
"repartition: use map");
100 int thread_per_mpi_rank = 1;
101 #if defined(KOKKOS_ENABLE_OPENMP)
102 using execution_space =
typename Node::device_type::execution_space;
103 if (std::is_same<execution_space, Kokkos::OpenMP>::value)
104 thread_per_mpi_rank = execution_space().concurrency();
107 if (minRowsPerThread > 0)
109 minRowsPerProcess = minRowsPerThread * thread_per_mpi_rank;
111 if (targetRowsPerThread == 0)
112 targetRowsPerThread = minRowsPerThread;
114 if (targetRowsPerThread > 0)
116 targetRowsPerProcess = targetRowsPerThread * thread_per_mpi_rank;
118 if (targetRowsPerProcess == 0)
119 targetRowsPerProcess = minRowsPerProcess;
122 Set<LO>(currentLevel,
"repartition: heuristic target rows per process", targetRowsPerProcess);
131 Afact = GetFactory(
"A");
132 if (!Afact.
is_null() && Teuchos::rcp_dynamic_cast<
const RAPFactory>(Afact) == Teuchos::null &&
133 Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
134 Teuchos::rcp_dynamic_cast<
const SubBlockAFactory>(Afact) == Teuchos::null) {
135 GetOStream(
Warnings) <<
"MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must "
136 "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! "
137 "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. "
138 "Please check the input. Make also sure that \"number of partitions\" is provided to "
139 "the Interface class and the RepartitionFactory instance. Instead, we have a "
143 A = Get<RCP<Matrix> >(currentLevel,
"A");
144 map = A->getRowMap();
146 map = Get<RCP<const Map> >(currentLevel,
"Map");
158 if (currentLevel.GetLevelID() == nodeRepartLevel && map->getComm()->getSize() > 1) {
163 if (NodeComm()->getSize() != map->getComm()->getSize()) {
164 GetOStream(
Statistics1) <<
"Repartitioning? YES: \n Within node only" << std::endl;
165 int nodeRank = NodeComm->getRank();
168 int isZero = (nodeRank == 0);
170 Teuchos::reduceAll(*map->getComm(),
Teuchos::REDUCE_SUM, isZero, Teuchos::outArg(numNodes));
171 Set(currentLevel,
"number of partitions", numNodes);
177 if (currentLevel.GetLevelID() < startLevel) {
179 <<
"\n current level = " <<
Teuchos::toString(currentLevel.GetLevelID()) <<
", first level where repartitioning can happen is " +
Teuchos::toString(startLevel) << std::endl;
182 Set(currentLevel,
"number of partitions", -1);
196 if (comm->getSize() == 1 && Teuchos::rcp_dynamic_cast<
const RAPFactory>(Afact) != Teuchos::null) {
198 <<
"\n comm size = 1" << std::endl;
200 Set(currentLevel,
"number of partitions", -1);
204 int numActiveProcesses = 0;
205 MueLu_sumAll(comm, Teuchos::as<int>((map->getLocalNumElements() > 0) ? 1 : 0), numActiveProcesses);
207 if (numActiveProcesses == 1) {
209 <<
"\n # processes with rows = " <<
Teuchos::toString(numActiveProcesses) << std::endl;
211 Set(currentLevel,
"number of partitions", 1);
220 <<
"\n # rows is below the single-proc threshold = " << putOnSingleProc << std::endl;
222 Set(currentLevel,
"number of partitions", 1);
226 bool test3 =
false, test4 =
false;
227 std::string msg3, msg4;
231 if (minRowsPerProcess > 0) {
233 LO haveFewRows = (numMyRows < minRowsPerProcess ? 1 : 0), numWithFewRows = 0;
235 MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
240 if (numWithFewRows > 0)
251 GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getLocalNumEntries());
253 MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz);
254 double imbalance = Teuchos::as<double>(maxNnz) / minNnz;
256 if (imbalance > nonzeroImbalance)
263 if (!test3 && !test4) {
264 GetOStream(
Statistics1) <<
"Repartitioning? NO:" << msg3 + msg4 << std::endl;
267 Set(currentLevel,
"number of partitions", -1);
271 GetOStream(
Statistics1) <<
"Repartitioning? YES:" << msg3 + msg4 << std::endl;
284 const auto globalNumRows = Teuchos::as<GO>(map->getGlobalNumElements());
285 int numPartitions = 1;
286 if (globalNumRows >= targetRowsPerProcess) {
288 numPartitions = std::max(Teuchos::as<int>(globalNumRows / targetRowsPerProcess), 1);
290 numPartitions = std::min(numPartitions, comm->getSize());
292 Set(currentLevel,
"number of partitions", numPartitions);
294 GetOStream(
Statistics1) <<
"Number of partitions to use = " << numPartitions << std::endl;
298 #endif // ifdef HAVE_MPI
#define MueLu_sumAll(rcpComm, in, out)
virtual ~RepartitionHeuristicFactory()
Destructor.
#define MueLu_maxAll(rcpComm, in, out)
void Build(Level ¤tLevel) const
Build an object with this factory.
T & get(const std::string &name, T def_value)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define SET_VALID_ENTRY(name)
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
#define MueLu_minAll(rcpComm, in, out)
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Factory for building a thresholded operator.
void DeclareInput(Level ¤tLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
int GetLevelID() const
Return level number.
Exception throws to report errors in the internal logical of the program.
Print all warning messages.
Factory for building coarse matrices.
RepartitionHeuristicFactory()
Constructor.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
virtual std::string description() const
Return a simple one-line description of this object.
std::string toString(const T &t)