47 #ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
48 #define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
56 #include <Teuchos_CommHelpers.hpp>
61 #include "MueLu_Utilities.hpp"
63 #include "MueLu_RAPFactory.hpp"
64 #include "MueLu_BlockedRAPFactory.hpp"
65 #include "MueLu_SubBlockAFactory.hpp"
70 #include "MueLu_RepartitionHeuristicFactory.hpp"
74 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
78 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
86 #undef SET_VALID_ENTRY
89 validParamList->
set<
RCP<const FactoryBase> >(
"Node Comm", Teuchos::null,
"Generating factory of the node level communicator");
91 return validParamList;
94 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
96 Input(currentLevel,
"A");
98 if(pL.
isParameter(
"repartition: node repartition level")) {
99 const int nodeRepartLevel = pL.
get<
int>(
"repartition: node repartition level");
100 if(currentLevel.
GetLevelID() == nodeRepartLevel) {
101 Input(currentLevel,
"Node Comm");
106 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 const int startLevel = pL.
get<
int> (
"repartition: start level");
114 const int nodeRepartLevel = pL.
get<
int> (
"repartition: node repartition level");
115 LO minRowsPerProcess = pL.
get<
LO> (
"repartition: min rows per proc");
116 LO targetRowsPerProcess = pL.
get<
LO> (
"repartition: target rows per proc");
117 LO minRowsPerThread = pL.
get<
LO> (
"repartition: min rows per thread");
118 LO targetRowsPerThread = pL.
get<
LO> (
"repartition: target rows per thread");
119 const double nonzeroImbalance = pL.
get<
double>(
"repartition: max imbalance");
121 int thread_per_mpi_rank = 1;
122 #if defined(HAVE_MUELU_KOKKOSCORE) && defined(KOKKOS_ENABLE_OPENMP)
123 using execution_space =
typename Node::device_type::execution_space;
124 if (std::is_same<execution_space, Kokkos::OpenMP>::value)
125 thread_per_mpi_rank = execution_space::concurrency();
128 if (minRowsPerThread > 0)
130 minRowsPerProcess = minRowsPerThread*thread_per_mpi_rank;
132 if (targetRowsPerThread == 0)
133 targetRowsPerThread = minRowsPerThread;
135 if (targetRowsPerThread > 0)
137 targetRowsPerProcess = targetRowsPerThread*thread_per_mpi_rank;
139 if (targetRowsPerProcess == 0)
140 targetRowsPerProcess = minRowsPerProcess;
143 Set<LO>(currentLevel,
"repartition: heuristic target rows per process",targetRowsPerProcess);
150 if(!Afact.
is_null() && Teuchos::rcp_dynamic_cast<
const RAPFactory>(Afact) == Teuchos::null &&
151 Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
152 Teuchos::rcp_dynamic_cast<
const SubBlockAFactory>(Afact) == Teuchos::null) {
154 "MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must " \
155 "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! " \
156 "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. " \
157 "Please check the input. Make also sure that \"number of partitions\" is provided to " \
158 "the Interface class and the RepartitionFactory instance. Instead, we have a "<<Afact->
description() << std::endl;
161 RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
171 if (currentLevel.GetLevelID() == nodeRepartLevel && A->getMap()->getComm()->getSize() > 1) {
176 if(NodeComm()->getSize() != A->getMap()->getComm()->getSize()) {
177 GetOStream(
Statistics1) <<
"Repartitioning? YES: \n Within node only"<<std::endl;
178 int nodeRank = NodeComm->getRank();
181 int isZero = (nodeRank == 0);
184 Set(currentLevel,
"number of partitions", numNodes);
190 if (currentLevel.GetLevelID() < startLevel) {
191 GetOStream(
Statistics1) <<
"Repartitioning? NO:" <<
193 ", first level where repartitioning can happen is " +
Teuchos::toString(startLevel) << std::endl;
196 Set(currentLevel,
"number of partitions", -1);
212 if (comm->getSize() == 1 && Teuchos::rcp_dynamic_cast<
const RAPFactory>(Afact) != Teuchos::null) {
213 GetOStream(
Statistics1) <<
"Repartitioning? NO:" <<
214 "\n comm size = 1" << std::endl;
216 Set(currentLevel,
"number of partitions", -1);
220 int numActiveProcesses = 0;
221 MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses);
223 if (numActiveProcesses == 1) {
224 GetOStream(
Statistics1) <<
"Repartitioning? NO:" <<
225 "\n # processes with rows = " <<
Teuchos::toString(numActiveProcesses) << std::endl;
227 Set(currentLevel,
"number of partitions", 1);
232 bool test3 =
false, test4 =
false;
233 std::string msg3, msg4;
237 if (minRowsPerProcess > 0) {
239 LO haveFewRows = (numMyRows < minRowsPerProcess ? 1 : 0), numWithFewRows = 0;
241 MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
246 if (numWithFewRows > 0)
254 GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries());
256 MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz);
257 double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
259 if (imbalance > nonzeroImbalance)
265 if (!test3 && !test4) {
266 GetOStream(
Statistics1) <<
"Repartitioning? NO:" << msg3 + msg4 << std::endl;
269 Set(currentLevel,
"number of partitions", -1);
273 GetOStream(
Statistics1) <<
"Repartitioning? YES:" << msg3 + msg4 << std::endl;
286 const auto globalNumRows = Teuchos::as<GO>(A->getGlobalNumRows());
287 int numPartitions = 1;
288 if (globalNumRows >= targetRowsPerProcess) {
290 numPartitions = std::max(Teuchos::as<int>(globalNumRows / targetRowsPerProcess), 1);
292 numPartitions = std::min(numPartitions, comm->getSize());
294 Set(currentLevel,
"number of partitions", numPartitions);
296 GetOStream(
Statistics1) <<
"Number of partitions to use = " << numPartitions << std::endl;
300 #endif //ifdef HAVE_MPI
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_maxAll(rcpComm, in, out)
void Build(Level ¤tLevel) const
Build an object with this factory.
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
Timer to be used in factories. Similar to Monitor but with additional timers.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
#define SET_VALID_ENTRY(name)
#define MueLu_minAll(rcpComm, in, out)
bool isParameter(const std::string &name) const
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Class that holds all level-specific information.
Factory for building a thresholded operator.
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
void DeclareInput(Level ¤tLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
int GetLevelID() const
Return level number.
Exception throws to report errors in the internal logical of the program.
Print all warning messages.
Factory for building coarse matrices.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
virtual std::string description() const
Return a simple one-line description of this object.
std::string toString(const T &t)