46 #ifndef MUELU_PERFUTILS_DEF_HPP
47 #define MUELU_PERFUTILS_DEF_HPP
53 #include <Teuchos_CommHelpers.hpp>
56 #include <Xpetra_Export.hpp>
57 #include <Xpetra_Import.hpp>
58 #include <Xpetra_Matrix.hpp>
67 void calculateStats(Type& minVal, Type& maxVal,
double& avgVal,
double& devVal,
int& minProc,
int& maxProc,
const RCP<
const Teuchos::Comm<int> >& comm,
int numActiveProcs,
const Type& v) {
77 w = (minVal == v) ? comm->getRank() : -1;
79 w = (maxVal == v) ? comm->getRank() : -1;
82 avgVal = (numActiveProcs > 0 ? as<double>(sumVal) / numActiveProcs : 0);
83 devVal = (numActiveProcs > 1 ? sqrt((sum2Val - sumVal*avgVal)/(numActiveProcs-1)) : 0);
89 double avgVal, devVal;
91 calculateStats<Type>(minVal, maxVal, avgVal, devVal, minProc, maxProc, comm, numActiveProcs, v);
94 if (avgVal && (paramList.is_null() || !paramList->isParameter(
"print abs") || paramList->get<
bool>(
"print abs") ==
false))
95 sprintf(buf,
"avg = %.2e, dev = %5.1f%%, min = %+6.1f%% (%8.2f on %4d), max = %+6.1f%% (%8.2f on %4d)", avgVal,
96 (devVal/avgVal)*100, (minVal/avgVal-1)*100, as<double>(minVal), minProc, (maxVal/avgVal-1)*100, as<double>(maxVal), maxProc);
98 sprintf(buf,
"avg = %8.2f, dev = %6.2f, min = %6.1f (on %4d), max = %6.1f (on %4d)", avgVal,
99 devVal, as<double>(minVal), minProc, as<double>(maxVal), maxProc);
104 bool cmp_less(
typename Map::value_type& v1,
typename Map::value_type& v2) {
105 return v1.second < v2.second;
108 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
113 typedef Xpetra::global_size_t global_size_t;
115 std::ostringstream ss;
117 ss << msgTag <<
" size = " << A.getGlobalNumRows() <<
" x " << A.getGlobalNumCols();
118 if(A.haveGlobalConstants())
119 ss <<
", nnz = " << A.getGlobalNumEntries();
125 bool printLoadBalanceInfo =
false, printCommInfo =
false;
126 if (params->
isParameter(
"printLoadBalancingInfo") && params->
get<
bool>(
"printLoadBalancingInfo"))
127 printLoadBalanceInfo =
true;
128 if (params->
isParameter(
"printCommInfo") && params->
get<
bool>(
"printCommInfo"))
129 printCommInfo =
true;
131 if (!printLoadBalanceInfo && !printCommInfo)
137 size_t numMyNnz = A.getNodeNumEntries(), numMyRows = A.getNodeNumRows();
141 bool activeProc =
true;
142 int numProc = origComm->getSize();
143 int numActiveProcs = 0;
146 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
148 std::vector<size_t> numRowsPerProc(numProc);
149 Teuchos::gatherAll(*origComm, 1, &numMyRows, numProc, &numRowsPerProc[0]);
152 bool rootFlag =
true;
153 for (
int i = 0; i < numProc; i++) {
154 if (numRowsPerProc[i]) {
163 if(numMyRows == 0) {activeProc =
false; numMyNnz = 0;}
178 absList.
set(
"print abs",
true);
180 if (printLoadBalanceInfo) {
181 ss << msgTag <<
" Load balancing info" << std::endl;
182 ss << msgTag <<
" # active processes: " << numActiveProcs <<
"/" << numProc << std::endl;
183 ss << msgTag <<
" # rows per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyRows) << std::endl;
184 ss << msgTag <<
" # nnz per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyNnz) << std::endl;
187 if (printCommInfo && numActiveProcs != 1) {
188 typedef std::map<int,size_t> map_type;
192 if (exportPIDs.
size())
193 for (
int i = 0; i < exportPIDs.
size(); i++)
194 neighMap[exportPIDs[i]]++;
198 size_t numExportSend = 0;
199 size_t numImportSend = 0;
205 numExportSend = (!exporter.
is_null() ? exporter->getNumExportIDs() : 0);
206 numImportSend = (!importer.
is_null() ? importer->getNumExportIDs() : 0);
207 numMsgs = neighMap.size();
208 map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
209 minMsg = (it != neighMap.end() ? it->second : 0);
210 it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
211 maxMsg = (it != neighMap.end() ? it->second : 0);
214 ss << msgTag <<
" Communication info" << std::endl;
215 ss << msgTag <<
" # num export send : " << stringStats<global_size_t>(origComm, numActiveProcs, numExportSend) << std::endl;
216 ss << msgTag <<
" # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
217 ss << msgTag <<
" # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
218 ss << msgTag <<
" # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
219 ss << msgTag <<
" # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
225 int strLength = outstr.size();
226 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
227 if (origComm->getRank() != root)
228 outstr.resize(strLength);
229 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
235 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
238 typedef Xpetra::global_size_t global_size_t;
240 std::ostringstream ss;
244 bool activeProc =
true;
245 int numActiveProcs = origComm->getSize();
248 MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
254 absList.
set(
"print abs",
true);
256 typedef std::map<int,size_t> map_type;
259 if (exportPIDs.
size())
260 for (
int i = 0; i < exportPIDs.
size(); i++)
261 neighMap[exportPIDs[i]]++;
264 size_t numImportSend = 0;
270 numImportSend = importer->getNumExportIDs();
271 numMsgs = neighMap.size();
272 map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
273 minMsg = (it != neighMap.end() ? it->second : 0);
274 it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
275 maxMsg = (it != neighMap.end() ? it->second : 0);
278 ss << msgTag <<
" Communication info" << std::endl;
279 ss << msgTag <<
" # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
280 ss << msgTag <<
" # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
281 ss << msgTag <<
" # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
282 ss << msgTag <<
" # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
288 int strLength = outstr.size();
289 MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
290 if (origComm->getRank() != root)
291 outstr.resize(strLength);
292 MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
298 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
303 std::ostringstream out;
306 int myRank = comm->getRank();
308 out << msgTag <<
" " << myRank <<
":";
310 RCP<const Import> importer = (A.getCrsGraph() != Teuchos::null ? A.getCrsGraph()->getImporter() : Teuchos::null);
318 if (exportPIDs.
size()) {
320 int neigh = exportPIDs[0];
322 for (
int i = 1; i < exportPIDs.
size(); i++) {
323 if (exportPIDs[i] != exportPIDs[i-1]) {
324 out <<
" " << neigh <<
"(" << weight <<
")";
326 neigh = exportPIDs[i];
333 out <<
" " << neigh <<
"(" << weight <<
")" << std::endl;
339 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
346 bool hasCrsGraph =
true;
359 #endif // MUELU_PERFUTILS_DEF_HPP
static bool CheckMatrix(const Matrix &A)
std::string stringStats(const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v, RCP< ParameterList > paramList=Teuchos::null)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_maxAll(rcpComm, in, out)
T & get(const std::string &name, T def_value)
ParameterList & set(std::string const &name, T const &value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
#define MueLu_minAll(rcpComm, in, out)
static std::string CommPattern(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static std::string PrintImporterInfo(RCP< const Import > importer, const std::string &msgTag)
bool isParameter(const std::string &name) const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void calculateStats(Type &minVal, Type &maxVal, double &avgVal, double &devVal, int &minProc, int &maxProc, const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v)
bool cmp_less(typename Map::value_type &v1, typename Map::value_type &v2)