MueLu  Version of the Day
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MueLu_PerfUtils_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // *****************************************************************************
3 // MueLu: A package for multigrid based preconditioning
4 //
5 // Copyright 2012 NTESS and the MueLu contributors.
6 // SPDX-License-Identifier: BSD-3-Clause
7 // *****************************************************************************
8 // @HEADER
9 
10 #ifndef MUELU_PERFUTILS_DEF_HPP
11 #define MUELU_PERFUTILS_DEF_HPP
12 
13 #include <algorithm>
14 #include <string>
15 
16 #ifdef HAVE_MPI
17 #include <Teuchos_CommHelpers.hpp>
18 #endif
19 
20 #include <Xpetra_Export.hpp>
21 #include <Xpetra_Import.hpp>
22 #include <Xpetra_Matrix.hpp>
23 #include <Xpetra_CrsMatrixWrap.hpp>
24 
25 #include "MueLu_PerfUtils_decl.hpp"
26 
27 //#include "MueLu_Utilities.hpp"
28 
29 namespace MueLu {
30 
31 template <class Type>
32 void calculateStats(Type& minVal, Type& maxVal, double& avgVal, double& devVal, int& minProc, int& maxProc, const RCP<const Teuchos::Comm<int> >& comm, int numActiveProcs, const Type& v) {
33  Type sumVal, sum2Val, v2 = v * v;
36 
37  MueLu_sumAll(comm, v, sumVal);
38  MueLu_sumAll(comm, v2, sum2Val);
39  MueLu_minAll(comm, v, minVal);
40  MueLu_maxAll(comm, v, maxVal);
41 
42  int w;
43  w = (minVal == v) ? comm->getRank() : -1;
44  MueLu_maxAll(comm, w, maxProc);
45  w = (maxVal == v) ? comm->getRank() : -1;
46  MueLu_maxAll(comm, w, minProc);
47 
48  avgVal = (numActiveProcs > 0 ? (as<double>(Teuchos::ScalarTraits<Type>::real(sumVal)) / numActiveProcs) : zero);
49  MT avgVal_MT = Teuchos::as<MT>(avgVal);
50  devVal = (numActiveProcs > 1 ? sqrt((as<double>(Teuchos::ScalarTraits<Type>::real(sum2Val - sumVal * avgVal_MT))) / (numActiveProcs - 1)) : zero);
51 }
52 
53 template <class Type>
54 std::string stringStats(const RCP<const Teuchos::Comm<int> >& comm, int numActiveProcs, const Type& v, RCP<ParameterList> paramList = Teuchos::null) {
55  Type minVal, maxVal;
56  double avgVal, devVal;
57  int minProc, maxProc;
58  calculateStats<Type>(minVal, maxVal, avgVal, devVal, minProc, maxProc, comm, numActiveProcs, v);
59 
60  const double zero = Teuchos::ScalarTraits<double>::zero();
61  const double one = Teuchos::ScalarTraits<double>::one();
62  std::ostringstream buf;
63  buf << std::fixed;
64  if ((avgVal != zero) && (paramList.is_null() || !paramList->isParameter("print abs") || paramList->get<bool>("print abs") == false)) {
65  double relDev = (devVal / avgVal) * 100;
66  double relMin = (as<double>(Teuchos::ScalarTraits<Type>::real(minVal)) / avgVal - one) * 100;
67  double relMax = (as<double>(Teuchos::ScalarTraits<Type>::real(maxVal)) / avgVal - one) * 100;
68  buf << "avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal << ", "
69  << "dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev << "%, "
70  << "min = " << std::fixed << std::setw(7) << std::setprecision(1) << std::setw(7) << relMin << "%"
71  << " (" << std::scientific << std::setw(10) << std::setprecision(2) << minVal << " on " << std::fixed << std::setw(4) << minProc << "), "
72  << "max = " << std::fixed << std::setw(7) << std::setprecision(1) << relMax << "%"
73  << " (" << std::scientific << std::setw(10) << std::setprecision(2) << maxVal << " on " << std::fixed << std::setw(4) << maxProc << ")";
74  } else {
75  double relDev = (avgVal != zero ? (devVal / avgVal) * 100 : zero);
76  buf << "avg = " << std::scientific << std::setw(10) << std::setprecision(2) << avgVal << ", "
77  << "dev = " << std::fixed << std::setw(6) << std::setprecision(1) << relDev << "%, "
78  << "min = " << std::scientific << std::setw(10) << std::setprecision(2) << minVal
79  << " (on " << std::fixed << std::setw(4) << minProc << "), "
80  << "max = " << std::scientific << std::setw(10) << std::setprecision(2) << maxVal
81  << " (on " << std::fixed << std::setw(4) << maxProc << ")";
82  }
83  return buf.str();
84 }
85 
86 template <class Map>
87 bool cmp_less(typename Map::value_type& v1, typename Map::value_type& v2) {
88  return v1.second < v2.second;
89 }
90 
91 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
92 std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::PrintMatrixInfo(const Matrix& A, const std::string& msgTag, RCP<const ParameterList> params) {
93  if (!CheckMatrix(A))
94  return "";
95 
97 
98  std::ostringstream ss;
99 
100  ss << msgTag << " size = " << A.getGlobalNumRows() << " x " << A.getGlobalNumCols();
101  if (A.haveGlobalConstants())
102  ss << ", nnz = " << A.getGlobalNumEntries();
103  ss << std::endl;
104 
105  if (params.is_null())
106  return ss.str();
107 
108  bool printLoadBalanceInfo = false, printCommInfo = false, printEntryStats = false;
109  if (params->isParameter("printLoadBalancingInfo") && params->get<bool>("printLoadBalancingInfo"))
110  printLoadBalanceInfo = true;
111  if (params->isParameter("printCommInfo") && params->get<bool>("printCommInfo"))
112  printCommInfo = true;
113  if (params->isParameter("printEntryStats") && params->get<bool>("printEntryStats"))
114  printEntryStats = true;
115 
116  if (!printLoadBalanceInfo && !printCommInfo && !printEntryStats)
117  return ss.str();
118 
119  RCP<const Import> importer = A.getCrsGraph()->getImporter();
120  RCP<const Export> exporter = A.getCrsGraph()->getExporter();
121 
122  size_t numMyNnz = A.getLocalNumEntries(), numMyRows = A.getLocalNumRows();
123 
124  // Create communicator only for active processes
125  RCP<const Teuchos::Comm<int> > origComm = A.getRowMap()->getComm();
126  bool activeProc = true;
127  int numProc = origComm->getSize();
128  int numActiveProcs = 0;
129 #ifdef HAVE_MPI
130  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
131  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
132 
133  std::vector<size_t> numRowsPerProc(numProc);
134  Teuchos::gatherAll(*origComm, 1, &numMyRows, numProc, &numRowsPerProc[0]);
135 
136  int root = 0;
137  bool rootFlag = true;
138  for (int i = 0; i < numProc; i++) {
139  if (numRowsPerProc[i]) {
140  ++numActiveProcs;
141  if (rootFlag) {
142  root = i;
143  rootFlag = false;
144  }
145  }
146  }
147 
148  if (numMyRows == 0) {
149  activeProc = false;
150  numMyNnz = 0;
151  } // Reset numMyNnz to avoid adding it up in reduceAll
152 #else
153  if (numMyRows == 0) {
154  // FIXME JJH 10-May-2017 Is there any case in serial where numMyRows would be zero?
155  // Reset numMyNnz to avoid adding it up in reduceAll
156  numActiveProcs = 0;
157  activeProc = false;
158  numMyNnz = 0;
159  } else {
160  numActiveProcs = 1;
161  }
162 #endif
163 
164  std::string outstr;
165  ParameterList absList;
166  absList.set("print abs", true);
167 
168  RCP<const Matrix> rcpA = rcpFromRef(A);
171  if (!crsWrapA.is_null())
172  crsA = crsWrapA->getCrsMatrix();
173  if (printEntryStats && !crsA.is_null()) {
174  typedef Teuchos::ScalarTraits<Scalar> STS;
175  typedef typename STS::magnitudeType magnitudeType;
177  ArrayRCP<const size_t> rowptr_RCP;
178  ArrayRCP<const LocalOrdinal> colind_RCP;
179  ArrayRCP<const Scalar> vals_RCP;
180  ArrayRCP<size_t> offsets_RCP;
183  ArrayView<size_t> offsets;
184 
185  crsA->getAllValues(rowptr_RCP, colind_RCP, vals_RCP);
186  crsA->getLocalDiagOffsets(offsets_RCP);
187  rowptr = rowptr_RCP();
188  vals = vals_RCP();
189  offsets = offsets_RCP();
190 
191  Scalar val, minVal, maxVal;
192  magnitudeType absVal, minAbsVal, maxAbsVal;
193  {
194  minVal = STS::rmax();
195  maxVal = STS::rmin();
196  minAbsVal = MTS::rmax();
197  maxAbsVal = MTS::zero();
198 
199  for (int i = 0; i < offsets.size(); i++) {
200  val = vals[rowptr[i] + offsets[i]];
201  if (STS::real(val) < STS::real(minVal))
202  minVal = val;
203  if (STS::real(val) > STS::real(maxVal))
204  maxVal = val;
205  absVal = STS::magnitude(val);
206  minAbsVal = std::min(minAbsVal, absVal);
207  maxAbsVal = std::max(maxAbsVal, absVal);
208  }
209 
210  ss << msgTag << " diag min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
211  ss << msgTag << " diag max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
212  ss << msgTag << " abs(diag) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
213  ss << msgTag << " abs(diag) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
214  }
215 
216  {
217  minVal = STS::rmax();
218  maxVal = STS::rmin();
219  minAbsVal = MTS::rmax();
220  maxAbsVal = MTS::zero();
221 
222  for (int i = 0; i < vals.size(); i++) {
223  val = vals[i];
224  if (STS::real(val) < STS::real(minVal))
225  minVal = val;
226  if (STS::real(val) > STS::real(maxVal))
227  maxVal = val;
228  absVal = STS::magnitude(val);
229  minAbsVal = std::min(minAbsVal, absVal);
230  maxAbsVal = std::max(maxAbsVal, absVal);
231  }
232 
233  ss << msgTag << " entry min : " << stringStats<Scalar>(origComm, numActiveProcs, minVal) << std::endl;
234  ss << msgTag << " entry max : " << stringStats<Scalar>(origComm, numActiveProcs, maxVal) << std::endl;
235  ss << msgTag << " abs(entry) min : " << stringStats<Scalar>(origComm, numActiveProcs, minAbsVal) << std::endl;
236  ss << msgTag << " abs(entry) max : " << stringStats<Scalar>(origComm, numActiveProcs, maxAbsVal) << std::endl;
237  }
238  }
239 
240  if (printLoadBalanceInfo) {
241  ss << msgTag << " Load balancing info" << std::endl;
242  ss << msgTag << " # active processes: " << numActiveProcs << "/" << numProc << std::endl;
243  ss << msgTag << " # rows per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyRows) << std::endl;
244  ss << msgTag << " # nnz per proc : " << stringStats<global_size_t>(origComm, numActiveProcs, numMyNnz) << std::endl;
245  }
246 
247  if (printCommInfo && numActiveProcs != 1) {
248  typedef std::map<int, size_t> map_type;
249  map_type neighMap;
250  if (!importer.is_null()) {
251  ArrayView<const int> exportPIDs = importer->getExportPIDs();
252  if (exportPIDs.size())
253  for (int i = 0; i < exportPIDs.size(); i++)
254  neighMap[exportPIDs[i]]++;
255  }
256 
257  // Communication volume
258  size_t numExportSend = 0;
259  size_t numImportSend = 0;
260  size_t numMsgs = 0;
261  size_t minMsg = 0;
262  size_t maxMsg = 0;
263 
264  if (activeProc) {
265  numExportSend = (!exporter.is_null() ? exporter->getNumExportIDs() : 0);
266  numImportSend = (!importer.is_null() ? importer->getNumExportIDs() : 0);
267  numMsgs = neighMap.size();
268  map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
269  minMsg = (it != neighMap.end() ? it->second : 0);
270  it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
271  maxMsg = (it != neighMap.end() ? it->second : 0);
272  }
273 
274  ss << msgTag << " Communication info" << std::endl;
275  ss << msgTag << " # num export send : " << stringStats<global_size_t>(origComm, numActiveProcs, numExportSend) << std::endl;
276  ss << msgTag << " # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
277  ss << msgTag << " # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
278  ss << msgTag << " # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
279  ss << msgTag << " # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
280  }
281 
282  outstr = ss.str();
283 
284 #ifdef HAVE_MPI
285  int strLength = outstr.size();
286  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
287  if (origComm->getRank() != root)
288  outstr.resize(strLength);
289  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
290 #endif
291 
292  return outstr;
293 }
294 
295 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
298 
299  std::ostringstream ss;
300 
301  // Create communicator only for active processes
302  RCP<const Teuchos::Comm<int> > origComm = importer->getSourceMap()->getComm();
303  bool activeProc = true;
304  int numActiveProcs = origComm->getSize();
305 #ifdef HAVE_MPI
306  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(origComm);
307  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
308  int root = 0;
309 #endif
310 
311  std::string outstr;
312  ParameterList absList;
313  absList.set("print abs", true);
314 
315  typedef std::map<int, size_t> map_type;
316  map_type neighMap;
317  ArrayView<const int> exportPIDs = importer->getExportPIDs();
318  if (exportPIDs.size())
319  for (int i = 0; i < exportPIDs.size(); i++)
320  neighMap[exportPIDs[i]]++;
321 
322  // Communication volume
323  size_t numImportSend = 0;
324  size_t numMsgs = 0;
325  size_t minMsg = 0;
326  size_t maxMsg = 0;
327 
328  if (activeProc) {
329  numImportSend = importer->getNumExportIDs();
330  numMsgs = neighMap.size();
331  map_type::const_iterator it = std::min_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
332  minMsg = (it != neighMap.end() ? it->second : 0);
333  it = std::max_element(neighMap.begin(), neighMap.end(), cmp_less<map_type>);
334  maxMsg = (it != neighMap.end() ? it->second : 0);
335  }
336 
337  ss << msgTag << " Communication info" << std::endl;
338  ss << msgTag << " # num import send : " << stringStats<global_size_t>(origComm, numActiveProcs, numImportSend) << std::endl;
339  ss << msgTag << " # num msgs : " << stringStats<global_size_t>(origComm, numActiveProcs, numMsgs, rcpFromRef(absList)) << std::endl;
340  ss << msgTag << " # min msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, minMsg) << std::endl;
341  ss << msgTag << " # max msg size : " << stringStats<global_size_t>(origComm, numActiveProcs, maxMsg) << std::endl;
342 
343  outstr = ss.str();
344 
345 #ifdef HAVE_MPI
346  int strLength = outstr.size();
347  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
348  if (origComm->getRank() != root)
349  outstr.resize(strLength);
350  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
351 #endif
352 
353  return outstr;
354 }
355 
356 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
357 std::string PerfUtils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CommPattern(const Matrix& A, const std::string& msgTag, RCP<const ParameterList> /* params */) {
358  if (!CheckMatrix(A))
359  return "";
360 
361  std::ostringstream out;
362 
363  RCP<const Teuchos::Comm<int> > comm = A.getRowMap()->getComm();
364  int myRank = comm->getRank();
365 
366  out << msgTag << " " << myRank << ":";
367 
368  RCP<const Import> importer = (A.getCrsGraph() != Teuchos::null ? A.getCrsGraph()->getImporter() : Teuchos::null);
369  if (importer.is_null()) {
370  out << std::endl;
371  return out.str();
372  }
373 
374  ArrayView<const int> exportPIDs = importer->getExportPIDs();
375 
376  if (exportPIDs.size()) {
377  // NodeTE: exportPIDs is sorted but not unique ( 1 1 1 2 2 3 4 4 4 )
378  int neigh = exportPIDs[0];
379  GO weight = 1;
380  for (int i = 1; i < exportPIDs.size(); i++) {
381  if (exportPIDs[i] != exportPIDs[i - 1]) {
382  out << " " << neigh << "(" << weight << ")";
383 
384  neigh = exportPIDs[i];
385  weight = 1;
386 
387  } else {
388  weight += 1;
389  }
390  }
391  out << " " << neigh << "(" << weight << ")" << std::endl;
392  }
393 
394  return out.str();
395 }
396 
397 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
399  // We can only print statistics for matrices that have a crs graph. A
400  // potential issue is regarding Xpetra::TpetraBlockCrsMatrix which has no
401  // CrsGraph. It is held as a private data member by Xpetra::CrsMatrix,
402  // which itself is an Xpetra::Matrix. So we check directly whether the
403  // request for the graph throws.
404  bool hasCrsGraph = true;
405  try {
406  A.getCrsGraph();
407 
408  } catch (...) {
409  hasCrsGraph = false;
410  }
411 
412  return hasCrsGraph;
413 }
414 
415 } // namespace MueLu
416 
417 #endif // MUELU_PERFUTILS_DEF_HPP
static bool CheckMatrix(const Matrix &A)
std::string stringStats(const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v, RCP< ParameterList > paramList=Teuchos::null)
#define MueLu_sumAll(rcpComm, in, out)
#define MueLu_maxAll(rcpComm, in, out)
GlobalOrdinal GO
T & get(const std::string &name, T def_value)
static magnitudeType real(T a)
size_type size() const
ParameterList & set(std::string const &name, T &&value, std::string const &docString="", RCP< const ParameterEntryValidator > const &validator=null)
#define MueLu_minAll(rcpComm, in, out)
static std::string CommPattern(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static std::string PrintImporterInfo(RCP< const Import > importer, const std::string &msgTag)
bool isParameter(const std::string &name) const
MueLu::DefaultScalar Scalar
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void calculateStats(Type &minVal, Type &maxVal, double &avgVal, double &devVal, int &minProc, int &maxProc, const RCP< const Teuchos::Comm< int > > &comm, int numActiveProcs, const Type &v)
size_t global_size_t
bool cmp_less(typename Map::value_type &v1, typename Map::value_type &v2)
bool is_null() const